import os
import pandas as pd
import pandasql as ps
from dotenv import load_dotenv
from sqlalchemy import create_engine, text

# Carrega as variáveis de ambiente do arquivo .env
load_dotenv()

True

# =======================================================
# Variáveis de acesso ao banco de dados
# =======================================================

USER = os.environ.get("DB_USER")
PASSWORD = os.environ.get("DB_PASSWORD")
DATABASE = os.environ.get("DB_NAME")
SGBD = "postgresql+psycopg2"
HOST = "localhost"

# =======================================================
# Nome do Schema
# =======================================================

SCHEMA = "ada"

# =======================================================
# Caminho para os arquivos csv
# =======================================================

FILE_PATH = "assets"


# =======================================================
# Define a quantidade de usuários e o tamanho do chunk
# =======================================================

QTD_USUARIOS = 20000
QTD_ELEMENTOS = 500000

# Configura para mostrar todas as colunas do dataframe sem truncar
pd.set_option('display.max_columns', 100)
# Configura para mostrar até 100 caracteres na coluna
pd.set_option('display.max_colwidth', 100)

# Carrega o dataset na variável anime
anime = pd.read_csv(f"{FILE_PATH}/anime.csv")

# Mostra os cinco primeiros registros do dataframe
anime.head()

# Informa a quantidade de registros e atributos do dataframe
print(f'A tabela anime possui {anime.shape[0]} linhas e {anime.shape[1]} colunas')

A tabela anime possui 17562 linhas e 35 colunas

# Mostra o nome e o tipo de dados de cada coluna verificando se há valores nulos
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17562 entries, 0 to 17561
Data columns (total 35 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   MAL_ID         17562 non-null  int64 
 1   Name           17562 non-null  object
 2   Score          17562 non-null  object
 3   Genres         17562 non-null  object
 4   English name   17562 non-null  object
 5   Japanese name  17562 non-null  object
 6   Type           17562 non-null  object
 7   Episodes       17562 non-null  object
 8   Aired          17562 non-null  object
 9   Premiered      17562 non-null  object
 10  Producers      17562 non-null  object
 11  Licensors      17562 non-null  object
 12  Studios        17562 non-null  object
 13  Source         17562 non-null  object
 14  Duration       17562 non-null  object
 15  Rating         17562 non-null  object
 16  Ranked         17562 non-null  object
 17  Popularity     17562 non-null  int64 
 18  Members        17562 non-null  int64 
 19  Favorites      17562 non-null  int64 
 20  Watching       17562 non-null  int64 
 21  Completed      17562 non-null  int64 
 22  On-Hold        17562 non-null  int64 
 23  Dropped        17562 non-null  int64 
 24  Plan to Watch  17562 non-null  int64 
 25  Score-10       17562 non-null  object
 26  Score-9        17562 non-null  object
 27  Score-8        17562 non-null  object
 28  Score-7        17562 non-null  object
 29  Score-6        17562 non-null  object
 30  Score-5        17562 non-null  object
 31  Score-4        17562 non-null  object
 32  Score-3        17562 non-null  object
 33  Score-2        17562 non-null  object
 34  Score-1        17562 non-null  object
dtypes: int64(9), object(26)
memory usage: 4.7+ MB

# Outra forma de verificar a quantidade de valores ausentes
anime.isnull().sum()

MAL_ID           0
Name             0
Score            0
Genres           0
English name     0
Japanese name    0
Type             0
Episodes         0
Aired            0
Premiered        0
Producers        0
Licensors        0
Studios          0
Source           0
Duration         0
Rating           0
Ranked           0
Popularity       0
Members          0
Favorites        0
Watching         0
Completed        0
On-Hold          0
Dropped          0
Plan to Watch    0
Score-10         0
Score-9          0
Score-8          0
Score-7          0
Score-6          0
Score-5          0
Score-4          0
Score-3          0
Score-2          0
Score-1          0
dtype: int64

# Obtem o resumo estatístico da tabela
anime.describe().T

# Forma manual de obter o resumo estatístico
resumo_estatistica_anime_status = pd.DataFrame()
resumo_estatistica_anime_status['Média'] = anime[['Watching', 'Completed', 'On-Hold', 'Dropped', 'Plan to Watch']].mean(numeric_only=True)
resumo_estatistica_anime_status['Mediana'] = anime[['Watching', 'Completed', 'On-Hold', 'Dropped', 'Plan to Watch']].median(numeric_only=True)
resumo_estatistica_anime_status['Moda'] = anime[['Watching', 'Completed', 'On-Hold', 'Dropped', 'Plan to Watch']].mode(numeric_only=True).T
resumo_estatistica_anime_status['Desvio Padrão'] = anime[['Watching', 'Completed', 'On-Hold', 'Dropped', 'Plan to Watch']].std(numeric_only=True)
resumo_estatistica_anime_status['Máximo'] = anime[['Watching', 'Completed', 'On-Hold', 'Dropped', 'Plan to Watch']].max(numeric_only=True)
resumo_estatistica_anime_status['Mínimo'] = anime[['Watching', 'Completed', 'On-Hold', 'Dropped', 'Plan to Watch']].min(numeric_only=True)
resumo_estatistica_anime_status

# Mostra o valor mais frequente e a quantidade de valores únicos de cada coluna do tipo object
anime.describe(include=object).T

# Forma manual de obter a quantidade de valores únicos por coluna
colunas_anime = ['Name', 'Premiered', 'Source', 'Type', 'Duration', 'Studios']
valores_unicos = pd.DataFrame(columns=['Nome', 'Qtd_valores_unicos', 'valores_unicos'])

for coluna in colunas_anime:
    if anime[coluna].dtype == object:
        valores_unicos.loc[len(valores_unicos)] = {'Nome': coluna, 'Qtd_valores_unicos': len(anime[coluna].unique()), "valores_unicos": ";".join(anime[coluna].unique())}

valores_unicos

# Carrega o dataset na variável anime_with_synopsis
anime_with_synopsis = pd.read_csv(f"{FILE_PATH}/anime_with_synopsis.csv")

# Mostra as primeiras cinco linhas do dataframe
anime_with_synopsis.head()

# Informa a quantidade de registros e atributos do dataframe
print(f'A tabela anime_with_synopsis possui {anime_with_synopsis.shape[0]} linhas e {anime_with_synopsis.shape[1]} colunas')

A tabela anime_with_synopsis possui 16214 linhas e 5 colunas

# Mostra o nome e o tipo de dados de cada coluna verificando se há valores nulos
anime_with_synopsis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16214 entries, 0 to 16213
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   MAL_ID     16214 non-null  int64 
 1   Name       16214 non-null  object
 2   Score      16214 non-null  object
 3   Genres     16214 non-null  object
 4   sypnopsis  16206 non-null  object
dtypes: int64(1), object(4)
memory usage: 633.5+ KB

# Outra forma de verificar a quantidade de valores ausentes
anime_with_synopsis.isnull().sum()

MAL_ID       0
Name         0
Score        0
Genres       0
sypnopsis    8
dtype: int64

# Como há nulos na coluna synopsis, optei por preenchê-la com o valor padrão "No synopsis information ..."
anime_with_synopsis['sypnopsis'] = anime_with_synopsis['sypnopsis'].fillna('No synopsis information has been added to this title. Help improve our database by adding a synopsis here .')

# Verificar novamente se há valores nulos
anime_with_synopsis.isnull().sum()

MAL_ID       0
Name         0
Score        0
Genres       0
sypnopsis    0
dtype: int64

# Mostra o valor mais frequente e a quantidade de valores únicos de cada coluna do tipo object
anime_with_synopsis.describe(include=object).T

"""
A tabela animelist original possui dados de 325.772 usuários diferentes totalizando 109 milhões de registros,
para ser viável executar esse dataset na minha máquina local, serão utilizados apenas dados dos primeiros
20000 usuários.
"""

# Cria um dataframe vazio
anime_list = pd.DataFrame()

""" 
Percorre o arquivo lendo qtd_elementos por vez filtrando os registros com user_id menor que a quantidade de usuários
pré definida para então concatenar com o dataframe anime_list
"""
for amostra in pd.read_csv(f"{FILE_PATH}/animelist.csv", chunksize=QTD_ELEMENTOS):
    amostra_filtrada = amostra[amostra['user_id'] < QTD_USUARIOS]
    anime_list = pd.concat([anime_list, amostra_filtrada])

# Mostra as primeiras cinco linhas do dataframe
anime_list.head()

# Informa a quantidade de registros e atributos do dataframe
print(f'A tabela animelist possui {anime_list.shape[0]} linhas e {anime_list.shape[1]} colunas')

A tabela animelist possui 6094515 linhas e 5 colunas

# Mostra o nome e o tipo de dados de cada coluna verificando se há valores nulos
anime_list.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6094515 entries, 0 to 6094514
Data columns (total 5 columns):
 #   Column            Dtype
---  ------            -----
 0   user_id           int64
 1   anime_id          int64
 2   rating            int64
 3   watching_status   int64
 4   watched_episodes  int64
dtypes: int64(5)
memory usage: 279.0 MB

# Outra forma de verificar a quantidade de valores ausentes
anime_list.isnull().sum()

user_id             0
anime_id            0
rating              0
watching_status     0
watched_episodes    0
dtype: int64

# Obtêm o resumo estatítico das avaliações e das quantidades de episódios assistidos
anime_list[['rating', 'watched_episodes']].describe().round(2)

# Verifica a frequência de usuários por watching status
anime_list['watching_status'].value_counts()

watching_status
2    3817977
6    1542507
1     290837
4     236033
3     207003
0        158
Name: count, dtype: int64

"""
A tabela rating_complete original possui dados de 310.059 usuários diferentes totalizando 57 milhões de registros,
para ser viável executar esse dataset na minha máquina local, serão utilizados apenas dados dos primeiros
20000 usuários.
"""

# Cria um dataframe vazio
rating_complete = pd.DataFrame()

""" 
Percorre o arquivo lendo qtd_elementos por vez filtrando os registros com user_id menor que a quantidade de usuários
pré definida para então concatenar com o dataframe rating_complete
"""
for amostra in pd.read_csv(f"{FILE_PATH}/rating_complete.csv", chunksize=QTD_ELEMENTOS):
    amostra_filtrada = amostra[amostra['user_id'] < QTD_USUARIOS]
    rating_complete = pd.concat([rating_complete, amostra_filtrada])

# Mostra as primeiras cinco linhas do dataframe
rating_complete.head()

# Informa a quantidade de registros e atributos do dataframe
print(f'A tabela rating_complete possui {rating_complete.shape[0]} linhas e {rating_complete.shape[1]} colunas')

A tabela rating_complete possui 3256216 linhas e 3 colunas

# Mostra o nome e o tipo de dados de cada coluna verificando se há valores nulos
rating_complete.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3256216 entries, 0 to 3256215
Data columns (total 3 columns):
 #   Column    Dtype
---  ------    -----
 0   user_id   int64
 1   anime_id  int64
 2   rating    int64
dtypes: int64(3)
memory usage: 99.4 MB

# Outra forma de verificar a quantidade de valores ausentes
rating_complete.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

# Obtêm o resumo estatístico da coluna rating
rating_complete['rating'].describe().round(2)

count    3256216.00
mean           7.51
std            1.69
min            1.00
25%            7.00
50%            8.00
75%            9.00
max           10.00
Name: rating, dtype: float64

# Carrega o dataset na variável watching_status
watching_status = pd.read_csv(f"{FILE_PATH}/watching_status.csv")

# Mostra as primeiras cinco linhas do dataframe
watching_status.head()

# Informa a quantidade de registros e atributos do dataframe
print(f'A tabela watching_status possui {watching_status.shape[0]} linhas e {watching_status.shape[1]} colunas')

A tabela watching_status possui 5 linhas e 2 colunas

# Mostra o nome e o tipo de dados de cada coluna verificando se há valores nulos
watching_status.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   status        5 non-null      int64 
 1    description  5 non-null      object
dtypes: int64(1), object(1)
memory usage: 208.0+ bytes

# Outra forma de verificar a quantidade de valores ausentes
watching_status.isnull().sum()

status          0
 description    0
dtype: int64

"""
Considerei a tabela anime_with_synopsis desnecessária, já que sua única adição é a coluna synopsis,
porém ela ainda não substitui completamente a tabela anime, pois faltam algumas colunas essenciais em
anime_with_synopsis. Então, decidi trazer apenas a coluna synopsis para a tabela anime.
"""
query = """
SELECT 
    -- anime --
    a.MAL_ID AS anime_id,
    a.Name AS name,
    CASE
        WHEN a.Score = 'Unknown' THEN -1
        ELSE CAST(a.Score AS FLOAT)
    END AS score,
    a.Genres AS genres,
    a."English name" AS english_name,
    a."Japanese name" AS japanese_name,
    a.Type AS type,
    CASE
        WHEN a.Episodes = 'Unknown' THEN -1
        ELSE CAST(a.Episodes AS INTEGER)
    END AS episodes,
    a.Aired AS aired,
    a.Premiered AS premiered,
    a.Producers AS producers,
    a.Licensors AS licensors,
    a.Studios AS studios,
    a.Source AS source,
    a.Duration AS duration,
    a.Rating AS rating,
    CASE
        WHEN a.Ranked = 'Unknown' THEN -1
        ELSE CAST(a.Ranked AS INTEGER)
    END AS ranked,
    a.Popularity AS popularity,
    a.Members AS members,
    a.Favorites AS favorites,
    
    -- anime_with_synopsis --
    sypnopsis AS synopsis
FROM 
    anime AS a
LEFT JOIN 
    anime_with_synopsis AS aws
ON a.MAL_ID = aws.MAL_ID
"""

new_anime_with_synopsis = ps.sqldf(query)

# Mostra as primeiras cinco linhas do dataframe
new_anime_with_synopsis.head()

""" 
A coluna description no dataframe watching_status possui um espaço no início  
"""

query = """ 
SELECT 
    status,
    " description" AS description
FROM
    watching_status
"""

watching_status = ps.sqldf(query)

# Mostras as cinco primeiras linhas
watching_status.head()

# SGBD://USER:PASSWORD@HOST/DATABASE
engine = create_engine(f"{SGBD}://{USER}:{PASSWORD}@{HOST}:5432/{DATABASE}")

# comando_ddl = f"""
# CREATE SCHEMA {SCHEMA}
# """

# with engine.connect() as con:
#     con.execute(text(comando_ddl))
#     con.commit()

comando_ddl = """ 
CREATE TABLE IF NOT EXISTS ada.anime
(
    anime_id INTEGER NOT NULL,
    name VARCHAR(500),
    score FLOAT,
    genres VARCHAR(1000),
    english_name VARCHAR(500),
    japanese_name VARCHAR(500),
    type VARCHAR(500),
    episodes INTEGER,
    aired VARCHAR(500),
    premiered VARCHAR(500),
    producers VARCHAR(1000),
    licensors VARCHAR(1000),
    studios VARCHAR(1000),
    source VARCHAR(500),
    duration VARCHAR(500),
    rating VARCHAR(500),
    ranked INTEGER,
    popularity INTEGER,
    members INTEGER,
    favorites INTEGER,
    synopsis VARCHAR(10000),
    PRIMARY KEY (anime_id)
)
"""

with engine.connect() as con:
    con.execute(text(comando_ddl))
    con.commit()

# new_anime_with_synopsis.to_sql('anime', con=engine, if_exists='append', index=False, schema=SCHEMA)

comando_ddl = """
CREATE TABLE IF NOT EXISTS ada.watching_status
(
    status INTEGER NOT NULL,
    description VARCHAR(500),
    PRIMARY KEY (status)
)
"""

with engine.connect() as con:
    con.execute(text(comando_ddl))
    con.commit()

# watching_status.to_sql('watching_status', con=engine, if_exists='append', index=False, schema=SCHEMA)

comando_ddl = """
CREATE TABLE IF NOT EXISTS ada.anime_list
(
    user_id INTEGER NOT NULL,
    anime_id INTEGER NOT NULL,
    rating INTEGER,
    watching_status INTEGER,
    watched_episodes INTEGER,
    PRIMARY KEY (user_id, anime_id),
    FOREIGN KEY (anime_id) REFERENCES ada.anime (anime_id) ON DELETE CASCADE ON UPDATE CASCADE,
    FOREIGN KEY (watching_status) REFERENCES ada.watching_status (status) ON DELETE CASCADE ON UPDATE CASCADE
)
 """
 
with engine.connect() as con:
    con.execute(text(comando_ddl))
    con.commit()

# anime_list.to_sql('anime_list', con=engine, if_exists='replace', index=False, schema=SCHEMA)

query = f"""
SELECT 
    -- anime --
    type,
    
    -- agrupamento --
    COUNT(*) AS qtd_animes
FROM 
    {SCHEMA}.anime
WHERE 
    score > 7.0
GROUP BY 
    type
ORDER BY 2 DESC
"""

pd.read_sql_query(query, con=engine)

query = f"""
SELECT 
    name,
    score,
    episodes,
    popularity,
    synopsis
FROM 
    {SCHEMA}.anime
WHERE 
    synopsis LIKE '%%demon%%'
ORDER BY popularity ASC
LIMIT 10
"""

pd.read_sql_query(query, con=engine)

query = f"""
SELECT 
    name,
    score,
    genres,
    episodes
FROM 
    {SCHEMA}.anime
WHERE 
    genres LIKE '%%Mystery%%' AND
    genres LIKE '%%Supernatural%%' AND 
    episodes > 13
ORDER BY score DESC
LIMIT 10
"""

pd.read_sql_query(query, con=engine)

query = f"""

WITH status_complete AS (
    SELECT
        -- anime_list --
        anime_id,
        
        -- watching_status --
        ws.description AS description,
        
        -- agrupamento --
        COUNT(*) AS watched
    FROM
        {SCHEMA}.anime_list as al
    LEFT JOIN 
        {SCHEMA}.watching_status as ws
    ON al.watching_status = ws.status
    GROUP BY anime_id, ws.description
)

SELECT 
    -- anime --
    a.anime_id,
    name,
    genres,
    premiered,
    
    -- status_completed --
    description,
    watched
FROM 
    status_complete AS sc
LEFT JOIN
    {SCHEMA}.anime AS a
ON sc.anime_id = a.anime_id
WHERE 
    description = 'Completed' AND 
    name LIKE 'D%%' AND
    (premiered LIKE '%%2011%%' OR
    premiered LIKE '%%2012%%' OR
    premiered LIKE '%%2013%%' OR
    premiered LIKE '%%2014%%' OR
    premiered LIKE '%%2015%%' OR
    premiered LIKE '%%2016%%' OR
    premiered LIKE '%%2017%%') AND
    (genres LIKE '%%Horror%%' OR
    genres LIKE '%%Sci-Fi%%')
ORDER BY watched DESC
LIMIT 10
"""

pd.read_sql_query(query, con=engine)

query = f"""
SELECT 
    name,
    score,
    studios,
    duration,
    premiered
FROM 
    {SCHEMA}.anime
WHERE 
    studios LIKE '%%Studio Pierrot%%' AND 
    (duration = '21 min. per ep.' OR 
    duration = '23 min. per ep.') AND
    (premiered LIKE 'Spring%%' OR 
    premiered LIKE 'Fall%%')
"""

pd.read_sql_query(query, con=engine)

query = f"""
SELECT 
    -- anime_list --
    al.user_id,
    al.watched_episodes,
    
    -- watching_status --
    description,
    
    -- anime --
    name,
    episodes,
    
    -- calculado --
    CONCAT(watched_episodes * 100 / episodes, '%%') AS progresso
FROM 
    {SCHEMA}.anime_list as al
LEFT JOIN 
    {SCHEMA}.anime AS a
ON 
    al.anime_id = a.anime_id
LEFT JOIN 
    {SCHEMA}.watching_status AS ws
ON 
    al.watching_status = ws.status
WHERE 
    a.name = 'Claymore' AND
    ws.description = 'Currently Watching' AND 
    watched_episodes >= (0.5 * episodes)
ORDER BY watched_episodes ASC
"""

pd.read_sql_query(query, con=engine)

query = f"""
SELECT 
    -- anime --
    a.source,
    
    -- anime_list --
    AVG(al.rating) AS rating_avg
FROM 
    {SCHEMA}.anime_list as al
LEFT JOIN 
    {SCHEMA}.anime as a
ON 
    al.anime_id = a.anime_id
LEFT JOIN 
    {SCHEMA}.watching_status as ws
ON 
    al.watching_status = ws.status
WHERE 
    ws.description = 'Completed'
GROUP BY a.source
ORDER BY rating_avg DESC
"""

pd.read_sql_query(query, con=engine)

query = f"""
WITH anime_media AS (
    SELECT 
        -- anime_list --
        anime_id,
        AVG(rating) AS rating_avg,
        COUNT(*) AS qt_dropped
    FROM 
        {SCHEMA}.anime_list as al
    LEFT JOIN 
        {SCHEMA}.watching_status as ws
    ON 
        al.watching_status = ws.status
    WHERE 
        description = 'Dropped' AND 
        watched_episodes <= 1
    GROUP BY anime_id
)

SELECT 
    -- anime --
    a.anime_id,
    name,
    score,
    
    -- anime_media --
    rating_avg,
    qt_dropped
FROM 
    anime_media as am
LEFT JOIN 
    {SCHEMA}.anime as a
ON 
    am.anime_id = a.anime_id
WHERE 
    rating_avg < 5.0
ORDER BY qt_dropped DESC
"""

pd.read_sql_query(query, con=engine)

query = f""" 

WITH anime_contagem_10 AS (
    SELECT 
        -- anime_list --
        anime_id,
        COUNT(rating) as score_10
    FROM
        {SCHEMA}.anime_list
    WHERE 
        rating = 10
    GROUP BY anime_id
)

SELECT 
    -- anime_contagem_10 --
    ac.*,
    
    -- anime --
    name,
    genres,
    premiered
FROM
    anime_contagem_10 as ac
LEFT JOIN 
    {SCHEMA}.anime as a
ON
    ac.anime_id = a.anime_id
WHERE 
    source = 'Manga' AND
    genres LIKE '%%Shoujo%%' AND 
    (premiered LIKE '%%2015%%' OR
    premiered LIKE '%%2016%%' OR 
    premiered LIKE '%%2017%%' OR
    premiered LIKE '%%2018%%' OR
    premiered LIKE '%%2019%%')
ORDER BY score_10 DESC
LIMIT 10
"""

pd.read_sql_query(query, con=engine)

query = f"""
SELECT 
    anime_id,
    name,
    score,
    genres,
    type,
    aired
FROM 
    {SCHEMA}.anime
WHERE 
    type = 'Movie' AND
    genres LIKE '%%Drama%%' AND
    aired LIKE '%%2016%%'
ORDER BY score DESC
LIMIT 10
"""

pd.read_sql_query(query, con=engine)

	MAL_ID	Name	Score	Genres	English name	Japanese name	Type	Episodes	Aired	Premiered	Producers	Licensors	Studios	Source	Duration	Rating	Ranked	Popularity	Members	Favorites	Watching	Completed	On-Hold	Dropped	Plan to Watch	Score-10	Score-9	Score-8	Score-7	Score-6	Score-5	Score-4	Score-3	Score-2	Score-1
0	1	Cowboy Bebop	8.78	Action, Adventure, Comedy, Drama, Sci-Fi, Space	Cowboy Bebop	カウボーイビバップ	TV	26	Apr 3, 1998 to Apr 24, 1999	Spring 1998	Bandai Visual	Funimation, Bandai Entertainment	Sunrise	Original	24 min. per ep.	R - 17+ (violence & profanity)	28.0	39	1251960	61971	105808	718161	71513	26678	329800	229170.0	182126.0	131625.0	62330.0	20688.0	8904.0	3184.0	1357.0	741.0	1580.0
1	5	Cowboy Bebop: Tengoku no Tobira	8.39	Action, Drama, Mystery, Sci-Fi, Space	Cowboy Bebop:The Movie	カウボーイビバップ天国の扉	Movie	1	Sep 1, 2001	Unknown	Sunrise, Bandai Visual	Sony Pictures Entertainment	Bones	Original	1 hr. 55 min.	R - 17+ (violence & profanity)	159.0	518	273145	1174	4143	208333	1935	770	57964	30043.0	49201.0	49505.0	22632.0	5805.0	1877.0	577.0	221.0	109.0	379.0
2	6	Trigun	8.24	Action, Sci-Fi, Adventure, Comedy, Drama, Shounen	Trigun	トライガン	TV	26	Apr 1, 1998 to Sep 30, 1998	Spring 1998	Victor Entertainment	Funimation, Geneon Entertainment USA	Madhouse	Manga	24 min. per ep.	PG-13 - Teens 13 or older	266.0	201	558913	12944	29113	343492	25465	13925	146918	50229.0	75651.0	86142.0	49432.0	15376.0	5838.0	1965.0	664.0	316.0	533.0
3	7	Witch Hunter Robin	7.27	Action, Mystery, Police, Supernatural, Drama, Magic	Witch Hunter Robin	Witch Hunter ROBIN (ウイッチハンターロビン)	TV	26	Jul 2, 2002 to Dec 24, 2002	Summer 2002	TV Tokyo, Bandai Visual, Dentsu, Victor Entertainment	Funimation, Bandai Entertainment	Sunrise	Original	25 min. per ep.	PG-13 - Teens 13 or older	2481.0	1467	94683	587	4300	46165	5121	5378	33719	2182.0	4806.0	10128.0	11618.0	5709.0	2920.0	1083.0	353.0	164.0	131.0
4	8	Bouken Ou Beet	6.98	Adventure, Fantasy, Shounen, Supernatural	Beet the Vandel Buster	冒険王ビィト	TV	52	Sep 30, 2004 to Sep 29, 2005	Fall 2004	TV Tokyo, Dentsu	Unknown	Toei Animation	Manga	23 min. per ep.	PG - Children	3710.0	4369	13224	18	642	7314	766	1108	3394	312.0	529.0	1242.0	1713.0	1068.0	634.0	265.0	83.0	50.0	27.0

	count	mean	std	min	25%	50%	75%	max
MAL_ID	17562.0	21477.192347	14900.093170	1.0	5953.5	22820.0	35624.75	48492.0
Popularity	17562.0	8763.452340	5059.327278	0.0	4383.5	8762.5	13145.00	17565.0
Members	17562.0	34658.539517	125282.141428	1.0	336.0	2065.0	13223.25	2589552.0
Favorites	17562.0	457.746270	4063.473313	0.0	0.0	3.0	31.00	183914.0
Watching	17562.0	2231.487758	14046.688133	0.0	13.0	73.0	522.00	887333.0
Completed	17562.0	22095.571347	91009.188504	0.0	111.0	817.5	6478.00	2182587.0
On-Hold	17562.0	955.049653	4275.675096	0.0	6.0	45.0	291.75	187919.0
Dropped	17562.0	1176.599533	4740.348653	0.0	37.0	77.0	271.00	174710.0
Plan to Watch	17562.0	8199.831227	23777.691963	1.0	112.0	752.5	4135.50	425531.0

	Média	Mediana	Moda	Desvio Padrão	Máximo	Mínimo
Watching	2231.487758	73.0	6	14046.688133	887333	0
Completed	22095.571347	817.5	0	91009.188504	2182587	0
On-Hold	955.049653	45.0	0	4275.675096	187919	0
Dropped	1176.599533	77.0	0	4740.348653	174710	0
Plan to Watch	8199.831227	752.5	26	23777.691963	425531	1

	MAL_ID	Name	Score	Genres	sypnopsis
0	1	Cowboy Bebop	8.78	Action, Adventure, Comedy, Drama, Sci-Fi, Space	In the year 2071, humanity has colonized several of the planets and moons of the solar system le...
1	5	Cowboy Bebop: Tengoku no Tobira	8.39	Action, Drama, Mystery, Sci-Fi, Space	other day, another bounty—such is the life of the often unlucky crew of the Bebop. However, this...
2	6	Trigun	8.24	Action, Sci-Fi, Adventure, Comedy, Drama, Shounen	Vash the Stampede is the man with a $$60,000,000,000 bounty on his head. The reason: he's a merc...
3	7	Witch Hunter Robin	7.27	Action, Mystery, Police, Supernatural, Drama, Magic	ches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-y...
4	8	Bouken Ou Beet	6.98	Adventure, Fantasy, Shounen, Supernatural	It is the dark century and the people are suffering under the rule of the devil, Vandel, who is ...

	anime_id	rating
0	430	9
1	1004	5
2	3010	7
3	570	7
4	2762	9

Projeto Final do Módulo de Banco de Dados I¶

Descrição:¶

Regras:¶

Grupo composto por:¶

Sobre a base de dados escolhida¶

Importações¶

Configurações Iniciais¶

dotenv¶

Variáveis¶

Pandas¶

Análise inicial das tabelas¶

Anime¶

Anime with Synopsis¶

Anime List¶

Rating Complete¶

Watching Status¶

Alterações nos dataframes¶

anime e anime_with_synopsis¶

watching_status¶

Postgres¶

Conexão com o banco¶

Criação Schema¶

Criação tabela anime¶

Criação tabela watching_status¶

Criação tabela anime_list¶

Perguntas¶

1) Quantidade de animes com pontuação geral acima de 7 por tipo.¶

2) TOP 10 Animes mais populares que possuam a palavra "demon" na synopsis.¶

3) TOP 10 animes com maior score e mais de 13 episódios que são do gênero Mystery e Supernatural.¶

4) TOP 10 animes estreados entre 2011 e 2017 mais assistidos que começam com a letra 'D' e são do gênero terror ou ficção científica.¶

5) Realize uma query que retorne todos os animes do estúdio "Studio Pierrot" que possuam episodios com duração de 21 ou 23 minutos estreados na temporada de outono ou primavera¶

6) Realize uma query que retorne os usuários que estão atualmente assistindo Claymore e que já tenham assistindo pelo menos metade da quantidade de episódios.¶

7) Calcule a média das avaliações dos usuários para os animes que foram assistidos completamente agrupados por sua fonte de origem.¶

8) Retorne os nomes dos animes que foram dropados com uma média de avaliação dos usuários inferior a 5 e nos quais os espectadores assistiram apenas um episódio ou nenhum.¶

9) TOP 10 animes baseados em mangás de gênero Shoujo estreados entre 2015 e 2019 que possuam a maior quantidade de avaliações nota 10.¶

10) TOP 10 filmes do gênero drama exibidos em 2016¶

	count	unique	top	freq
Name	17562	17558	Maou Gakuin no Futekigousha: Shijou Saikyou no Maou no Shiso, Tensei shite Shison-tachi no Gakkou e	3
Score	17562	533	Unknown	5141
Genres	17562	5034	Hentai	969
English name	17562	6831	Unknown	10565
Japanese name	17562	16679	Unknown	48
Type	17562	7	TV	4996
Episodes	17562	201	1	8381
Aired	17562	11947	Unknown	309
Premiered	17562	231	Unknown	12817
Producers	17562	3783	Unknown	7794
Licensors	17562	231	Unknown	13616
Studios	17562	1090	Unknown	7079
Source	17562	16	Original	5215
Duration	17562	313	24 min. per ep.	1723
Rating	17562	7	PG-13 - Teens 13 or older	6132
Ranked	17562	10490	Unknown	1762
Score-10	17562	3379	4.0	936
Score-9	17562	3645	Unknown	3167
Score-8	17562	4515	Unknown	1371
Score-7	17562	4933	2.0	610
Score-6	17562	4236	Unknown	511
Score-5	17562	3288	Unknown	584
Score-4	17562	2235	Unknown	977
Score-3	17562	1506	Unknown	1307
Score-2	17562	1110	Unknown	1597
Score-1	17562	1084	4.0	955

	Nome	Qtd_valores_unicos	valores_unicos
0	Name	17558	Cowboy Bebop;Cowboy Bebop: Tengoku no Tobira;Trigun;Witch Hunter Robin;Bouken Ou Beet;Eyeshield ...
1	Premiered	231	Spring 1998;Unknown;Summer 2002;Fall 2004;Spring 2005;Fall 2002;Spring 2004;Fall 1999;Fall 2001;...
2	Source	16	Original;Manga;Light novel;Game;Visual novel;4-koma manga;Novel;Unknown;Other;Picture book;Web m...
3	Type	7	TV;Movie;OVA;Special;ONA;Music;Unknown
4	Duration	313	24 min. per ep.;1 hr. 55 min.;25 min. per ep.;23 min. per ep.;27 min. per ep.;24 min.;22 min. pe...
5	Studios	1090	Sunrise;Bones;Madhouse;Toei Animation;Gallop;J.C.Staff;Nippon Animation;A.C.G.T.;Studio Pierrot;...

	count	unique	top	freq
Name	16214	16210	Maou Gakuin no Futekigousha: Shijou Saikyou no Maou no Shiso, Tensei shite Shison-tachi no Gakkou e	3
Score	16214	532	Unknown	5123
Genres	16214	4857	Music	790
sypnopsis	16214	15221	No synopsis information has been added to this title. Help improve our database by adding a syno...	717

	rating	watched_episodes
count	6094515.00	6094515.00
mean	4.30	12.31
std	3.91	172.42
min	0.00	0.00
25%	0.00	0.00
50%	6.00	3.00
75%	8.00	12.00
max	10.00	65535.00

	status	description
0	1	Currently Watching
1	2	Completed
2	3	On Hold
3	4	Dropped
4	6	Plan to Watch

	name	score	episodes	popularity	synopsis
0	Naruto	7.91	220	8	oments prior to Naruto Uzumaki's birth, a huge demon known as the Kyuubi, the Nine-Tailed Fox, a...
1	Kimetsu no Yaiba	8.62	26	24	Ever since the death of his father, the burden of supporting the family has fallen upon Tanjirou...
2	Ao no Exorcist	7.59	25	25	Humans and demons are two sides of the same coin, as are Assiah and Gehenna, their respective wo...
3	Ansatsu Kyoushitsu	8.12	22	37	hen a mysterious creature chops the moon down to a permanent crescent, the students of class 3-E...
4	Bakemonogatari	8.36	15	61	Koyomi Araragi, a third-year high school student, manages to survive a vampire attack with the h...
5	Hataraku Maou-sama!	7.83	13	62	Striking fear into the hearts of mortals, the Demon Lord Satan begins to conquer the land of Ent...
6	Mononoke Hime	8.72	1	91	hen an Emishi village is attacked by a fierce demon boar, the young prince Ashitaka puts his lif...
7	Kuroshitsuji	7.75	24	92	Young Ciel Phantomhive is known as "the Queen's Guard Dog," taking care of the many unsettling e...
8	Howl no Ugoku Shiro	8.67	1	98	That jumbled piece of architecture, that cacophony of hissing steam and creaking joints, with sm...
9	Fate/Zero 2nd Season	8.59	12	105	s the Fourth Holy Grail War rages on with no clear victor in sight, the remaining Servants and t...

	name	score	genres	episodes
0	Monogatari Series: Second Season	8.78	Mystery, Comedy, Supernatural, Romance, Vampire	26
1	Mushishi	8.69	Adventure, Slice of Life, Mystery, Historical, Supernatural, Fantasy, Seinen	26
2	Death Note	8.63	Mystery, Police, Psychological, Supernatural, Thriller, Shounen	37
3	Mo Dao Zu Shi	8.50	Action, Adventure, Mystery, Historical, Supernatural	15
4	Bakemonogatari	8.36	Romance, Supernatural, Mystery, Vampire	15
5	Shinsekai yori	8.35	Drama, Horror, Mystery, Psychological, Sci-Fi, Supernatural	25
6	Higurashi no Naku Koro ni Kai	8.23	Mystery, Psychological, Supernatural, Thriller	24
7	Durarara!!	8.18	Action, Mystery, Supernatural	24
8	Gankutsuou	8.17	Drama, Mystery, Sci-Fi, Supernatural, Thriller	24
9	xxxHOLiC	8.01	Comedy, Drama, Mystery, Psychological, Supernatural	24

	anime_id	name	genres	premiered	description	watched
0	6880	Deadman Wonderland	Action, Horror, Sci-Fi, Shounen, Supernatural	Spring 2011	Completed	5234
1	15583	Date A Live	Sci-Fi, Harem, Comedy, Romance, Mecha, School	Spring 2013	Completed	4351
2	16592	Danganronpa: Kibou no Gakuen to Zetsubou no Koukousei The Animation	Mystery, Horror, Psychological, School	Summer 2013	Completed	3972
3	19163	Date A Live II	Comedy, Harem, Mecha, Romance, School, Sci-Fi	Spring 2014	Completed	2980
4	9379	Denpa Onna to Seishun Otoko	Sci-Fi, Slice of Life, Comedy	Spring 2011	Completed	1833
5	31163	Dimension W	Action, Sci-Fi, Seinen	Winter 2016	Completed	1483
6	32189	Danganronpa 3: The End of Kibougamine Gakuen - Mirai-hen	Action, Mystery, Horror, Psychological	Summer 2016	Completed	1288
7	33028	Danganronpa 3: The End of Kibougamine Gakuen - Zetsubou-hen	Action, Mystery, Horror, Psychological, School	Summer 2016	Completed	1199
8	23333	DRAMAtical Murder	Action, Sci-Fi, Super Power, Psychological	Summer 2014	Completed	698
9	31710	Divine Gate	Action, Sci-Fi, Fantasy	Winter 2016	Completed	692

	name	score	studios	duration	premiered
0	Naruto	7.91	Studio Pierrot	23 min. per ep.	Fall 2002
1	Ayashi no Ceres	7.23	Studio Pierrot	21 min. per ep.	Spring 2000
2	Fushigi Yuugi	7.64	Studio Pierrot	23 min. per ep.	Spring 1995
3	Gensoumaden Saiyuuki	7.59	Studio Pierrot	23 min. per ep.	Spring 2000
4	Saiyuuki Reload	7.41	Studio Pierrot	23 min. per ep.	Fall 2003
5	Hikaru no Go	8.11	Studio Pierrot	23 min. per ep.	Fall 2001
6	E's Otherwise	6.64	Studio Pierrot	23 min. per ep.	Spring 2003
7	Tenshi ni Narumon!	6.71	Studio Pierrot	23 min. per ep.	Spring 1999
8	Kaze no Youjinbou	7.19	Studio Pierrot	23 min. per ep.	Fall 2001
9	Ganbare! Kickers	6.82	Studio Pierrot	23 min. per ep.	Fall 1986
10	Mahou no Idol Pastel Yumi	6.29	Studio Pierrot	23 min. per ep.	Spring 1986
11	Osomatsu-san	8.02	Studio Pierrot	23 min. per ep.	Fall 2015
12	Black Clover	7.38	Studio Pierrot	23 min. per ep.	Fall 2017
13	Osomatsu-san 2nd Season	7.60	Studio Pierrot	23 min. per ep.	Fall 2017
14	Akudama Drive	7.75	Studio Pierrot	23 min. per ep.	Fall 2020
15	Osomatsu-san 3rd Season	7.42	Studio Pierrot	23 min. per ep.	Fall 2020

	user_id	watched_episodes	description	name	episodes	progresso
0	15339	13	Currently Watching	Claymore	26	50%
1	4948	13	Currently Watching	Claymore	26	50%
2	7505	13	Currently Watching	Claymore	26	50%
3	10227	13	Currently Watching	Claymore	26	50%
4	16682	14	Currently Watching	Claymore	26	53%
5	14196	15	Currently Watching	Claymore	26	57%
6	10008	16	Currently Watching	Claymore	26	61%
7	19355	16	Currently Watching	Claymore	26	61%
8	3887	16	Currently Watching	Claymore	26	61%
9	15768	16	Currently Watching	Claymore	26	61%
10	16609	17	Currently Watching	Claymore	26	65%
11	4597	17	Currently Watching	Claymore	26	65%
12	9767	17	Currently Watching	Claymore	26	65%
13	849	18	Currently Watching	Claymore	26	69%
14	5827	18	Currently Watching	Claymore	26	69%
15	13046	18	Currently Watching	Claymore	26	69%
16	16461	19	Currently Watching	Claymore	26	73%
17	18167	20	Currently Watching	Claymore	26	76%
18	5050	20	Currently Watching	Claymore	26	76%
19	18080	20	Currently Watching	Claymore	26	76%
20	10581	21	Currently Watching	Claymore	26	80%
21	8812	22	Currently Watching	Claymore	26	84%
22	1593	22	Currently Watching	Claymore	26	84%
23	6956	23	Currently Watching	Claymore	26	88%
24	1801	25	Currently Watching	Claymore	26	96%
25	2643	26	Currently Watching	Claymore	26	100%
26	15533	26	Currently Watching	Claymore	26	100%

	type	qtd_animes
0	TV	1731
1	Movie	732
2	OVA	592
3	Special	430
4	ONA	190
5	Music	69

	source	rating_avg
0	Novel	6.740472
1	Web manga	6.637504
2	Manga	6.514068
3	Original	6.466376
4	Light novel	6.445040
5	4-koma manga	6.330569
6	Visual novel	6.130764
7	Card game	6.064315
8	Music	5.840135
9	Other	5.838581
10	Book	5.810191
11	Game	5.704494
12	Picture book	5.531469
13	Digital manga	5.193768
14	Unknown	5.118630
15	Radio	4.828877

	anime_id	name	score	rating_avg	qt_dropped
0	269	Bleach	7.80	2.597701	261
1	21	One Piece	8.52	2.745020	251
2	1735	Naruto: Shippuuden	8.16	3.092827	237
3	6702	Fairy Tail	7.68	2.221198	217
4	20	Naruto	7.91	2.540107	187
...	...	...	...	...	...
8590	30095	Qi Cai Lao Fu Zi	-1.00	0.000000	1
8591	30096	Lao Fu Zi Shui Hu Chuan	-1.00	0.000000	1
8592	30097	Shan T Lao Fu Zi	-1.00	0.000000	1
8593	30098	Lao Fu Zi Fan Dou Zhen Tan	-1.00	0.000000	1
8594	30099	Lao Fu Zi Zhi Xiao Shui Hu Chuan Qi	-1.00	0.000000	1

	anime_id	name	score	genres	type	aired
0	28851	Koe no Katachi	9.00	Drama, School, Shounen	Movie	Sep 17, 2016
1	32281	Kimi no Na wa.	8.96	Romance, Supernatural, School, Drama	Movie	Aug 26, 2016
2	15227	Kono Sekai no Katasumi ni	8.24	Historical, Drama, Seinen	Movie	Nov 12, 2016
3	29831	Tamayura: Sotsugyou Shashin Part 4 - Ashita	8.10	Comedy, Drama, Slice of Life	Movie	Apr 2, 2016
4	31490	One Piece Film: Gold	7.95	Action, Adventure, Comedy, Drama, Fantasy, Shounen	Movie	Jul 23, 2016
5	33221	Mobile Suit Gundam Thunderbolt: December Sky	7.91	Action, Military, Sci-Fi, Space, Drama, Mecha	Movie	Jun 25, 2016
6	33190	Planetarian: Hoshi no Hito	7.86	Sci-Fi, Drama	Movie	Sep 3, 2016
7	32551	Digimon Adventure tri. 3: Kokuhaku	7.71	Action, Adventure, Comedy, Drama	Movie	Sep 24, 2016
8	10259	Da Yu Hai Tang (Movie)	7.58	Adventure, Supernatural, Drama, Romance	Movie	Jul 8, 2016
9	31418	KanColle Movie	7.53	Action, Military, Sci-Fi, Drama	Movie	Nov 26, 2016

	anime_id	score_10	name	genres	premiered
0	28297	325	Ore Monogatari!!	Comedy, Romance, Shoujo	Spring 2015
1	32729	229	Orange	Sci-Fi, Drama, Romance, School, Shoujo	Summer 2016
2	36649	226	Banana Fish	Action, Adventure, Drama, Shoujo	Summer 2018
3	25681	206	Kamisama Hajimemashita◎	Comedy, Demons, Supernatural, Romance, Fantasy, Shoujo	Winter 2015
4	30123	197	Akagami no Shirayuki-hime	Fantasy, Romance, Drama, Shoujo	Summer 2015
5	38680	197	Fruits Basket 1st Season	Comedy, Drama, Romance, Shoujo, Slice of Life, Supernatural	Spring 2019
6	32983	189	Natsume Yuujinchou Go	Slice of Life, Demons, Supernatural, Drama, Shoujo	Fall 2016
7	34591	179	Natsume Yuujinchou Roku	Slice of Life, Demons, Supernatural, Drama, Shoujo	Spring 2017
8	31173	142	Akagami no Shirayuki-hime 2nd Season	Drama, Romance, Fantasy, Shoujo	Winter 2016
9	37786	105	Yagate Kimi ni Naru	Romance, School, Shoujo Ai	Fall 2018