Skip to content

Commit

Permalink
fix(version): linter online_data to bump version (#128)
Browse files Browse the repository at this point in the history
  • Loading branch information
luabida authored Apr 20, 2023
1 parent d449de4 commit eddb93c
Show file tree
Hide file tree
Showing 14 changed files with 395 additions and 393 deletions.
2 changes: 1 addition & 1 deletion pysus/online_data/CIHA.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def download(
:param states: 2 letter state code,
:param years: 4 digit integer
"""
return FTP_Downloader("CIHA").download(
return FTP_Downloader('CIHA').download(
UFs=states,
years=years,
months=months,
Expand Down
28 changes: 14 additions & 14 deletions pysus/online_data/CNES.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
from pysus.online_data import CACHEPATH, FTP_Downloader

group_dict = {
"LT": ["Leitos - A partir de Out/2005", 10, 2005],
"ST": ["Estabelecimentos - A partir de Ago/2005", 8, 2005],
"DC": ["Dados Complementares - A partir de Ago/2005", 8, 2005],
"EQ": ["Equipamentos - A partir de Ago/2005", 8, 2005],
"SR": ["Serviço Especializado - A partir de Ago/2005", 8, 2005],
"HB": ["Habilitação - A partir de Mar/2007", 3, 2007],
"PF": ["Profissional - A partir de Ago/2005", 8, 2005],
"EP": ["Equipes - A partir de Abr/2007", 5, 2007],
"IN": ["Incentivos - A partir de Nov/2007", 11, 2007],
"RC": ["Regra Contratual - A partir de Mar/2007", 3, 2007],
"EE": ["Estabelecimento de Ensino - A partir de Mar/2007", 3, 2007],
"EF": ["Estabelecimento Filantrópico - A partir de Mar/2007", 3, 2007],
"GM": ["Gestão e Metas - A partir de Jun/2007", 6, 2007],
'LT': ['Leitos - A partir de Out/2005', 10, 2005],
'ST': ['Estabelecimentos - A partir de Ago/2005', 8, 2005],
'DC': ['Dados Complementares - A partir de Ago/2005', 8, 2005],
'EQ': ['Equipamentos - A partir de Ago/2005', 8, 2005],
'SR': ['Serviço Especializado - A partir de Ago/2005', 8, 2005],
'HB': ['Habilitação - A partir de Mar/2007', 3, 2007],
'PF': ['Profissional - A partir de Ago/2005', 8, 2005],
'EP': ['Equipes - A partir de Abr/2007', 5, 2007],
'IN': ['Incentivos - A partir de Nov/2007', 11, 2007],
'RC': ['Regra Contratual - A partir de Mar/2007', 3, 2007],
'EE': ['Estabelecimento de Ensino - A partir de Mar/2007', 3, 2007],
'EF': ['Estabelecimento Filantrópico - A partir de Mar/2007', 3, 2007],
'GM': ['Gestão e Metas - A partir de Jun/2007', 6, 2007],
}


Expand Down Expand Up @@ -47,7 +47,7 @@ def download(
:param states: 2 letter state code, can be a list of UFs
:param years: 4 digit integer, can be a list of years
"""
return FTP_Downloader("CNES").download(
return FTP_Downloader('CNES').download(
CNES_group=group,
UFs=states,
years=years,
Expand Down
48 changes: 24 additions & 24 deletions pysus/online_data/ESUS.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,79 +17,79 @@ def download(uf, cache=True, checkmemory=True):
other an iterator of chunks of size 1000.
"""
uf = uf.lower()
user = "user-public-notificacoes"
pwd = "Za4qNXdyQNSa9YaA"
user = 'user-public-notificacoes'
pwd = 'Za4qNXdyQNSa9YaA'
today = date.today()
dt = today.strftime("_%d_%m_%Y")
base = f"desc-esus-notifica-estado-{uf}" # desc-notificacoes-esusve-
url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br"
out = f"ESUS_{uf}_{dt}.parquet"
dt = today.strftime('_%d_%m_%Y')
base = f'desc-esus-notifica-estado-{uf}' # desc-notificacoes-esusve-
url = f'https://{user}:{pwd}@elasticsearch-saps.saude.gov.br'
out = f'ESUS_{uf}_{dt}.parquet'

cachefile = os.path.join(CACHEPATH, out)
tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{uf.upper()}.csv.gz")
tempfile = os.path.join(CACHEPATH, f'ESUS_temp_{uf.upper()}.csv.gz')
if os.path.exists(cachefile):
logger.info(f"Local parquet file found at {cachefile}")
logger.info(f'Local parquet file found at {cachefile}')
df = pd.read_parquet(cachefile)
elif os.path.exists(tempfile):
logger.info(f"Local csv file found at {tempfile}")
logger.info(f'Local csv file found at {tempfile}')
df = pd.read_csv(tempfile, chunksize=1000)
else:
fname = fetch(base, uf, url)
size = os.stat(fname).st_size
if size > 50e6 and checkmemory:
print(f"Downloaded data is to large:{size / 1e6} MB compressed.")
print(f'Downloaded data is to large:{size / 1e6} MB compressed.')
print(
"Only loading the first 1000 rows. If your computer has enough"
'Only loading the first 1000 rows. If your computer has enough'
+ " memory, set 'checkmemory' to False"
)
print(f"The full data is in {fname}")
print(f'The full data is in {fname}')
df = pd.read_csv(fname, chunksize=1000)
else:
df = pd.read_csv(fname, low_memory=False)
print(f"{df.shape[0]} records downloaded.")
print(f'{df.shape[0]} records downloaded.')
os.unlink(fname)
if cache:
df.to_parquet(cachefile)
logger.info(f"Data stored as parquet at {cachefile}")
logger.info(f'Data stored as parquet at {cachefile}')

return df


def fetch(base, uf, url):
UF = uf.upper()
print(f"Reading ESUS data for {UF}")
es = Elasticsearch([url], send_get_body_as="POST")
body = {"query": {"match_all": {}}}
print(f'Reading ESUS data for {UF}')
es = Elasticsearch([url], send_get_body_as='POST')
body = {'query': {'match_all': {}}}
results = helpers.scan(es, query=body, index=base)
# df = pd.DataFrame.from_dict(
# [document['_source'] for document in results]
# )

chunker = chunky_fetch(results, 3000)
h = 1
tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{UF}.csv.gz")
tempfile = os.path.join(CACHEPATH, f'ESUS_temp_{UF}.csv.gz')
for ch in chunker:
df = pd.DataFrame.from_dict(ch)
df.sintomas = df["sintomas"].str.replace(
";",
"",
df.sintomas = df['sintomas'].str.replace(
';',
'',
) # remove os ;
if h:
df.to_csv(tempfile)
h = 0
else:
df.to_csv(tempfile, mode="a", header=False)
df.to_csv(tempfile, mode='a', header=False)
# df = pd.read_csv('temp.csv.gz')

return tempfile


def chunky_fetch(results, chunk_size=3000):
"Fetches data in chunks to preserve memory"
"""Fetches data in chunks to preserve memory"""
data = []
i = 0
for d in results:
data.append(d["_source"])
data.append(d['_source'])
i += 1
if i == chunk_size:
yield data
Expand Down
54 changes: 27 additions & 27 deletions pysus/online_data/IBGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

from urllib.error import HTTPError

APIBASE = "https://servicodados.ibge.gov.br/api/v3/"
APIBASE = 'https://servicodados.ibge.gov.br/api/v3/'


def get_sidra_table(
table_id,
territorial_level,
geocode="all",
geocode='all',
period=None,
variables=None,
classification=None,
Expand Down Expand Up @@ -79,31 +79,31 @@ def get_sidra_table(
:param headers: `y` para receber o header (valor default, caso o parâmetro h não seja especificado). `n` para não receber o header.
:return:
"""
base_url = "https://apisidra.ibge.gov.br/values"
query = f"/t/{table_id}/n{territorial_level}/{geocode}"
base_url = 'https://apisidra.ibge.gov.br/values'
query = f'/t/{table_id}/n{territorial_level}/{geocode}'
if period is not None:
query += f"/p/{period}"
query += f'/p/{period}'
if variables is not None:
query += f"/v/{variables}"
query += f'/v/{variables}'
if classification is not None:
query += f"/c{classification}"
query += f'/c{classification}'
if categories is not None:
query += f"/{categories}"
query += f'/{categories}'
if format is not None:
query += f"/f/{format}"
query += f'/f/{format}'
if decimals is not None:
query += f"/d/{decimals}"
query += f'/d/{decimals}'
if headers is not None:
query += f"/h/{headers}"
query += f'/h/{headers}'

url = base_url + query
print(f"Requesting data from {url}")
print(f'Requesting data from {url}')
try:
with (get_legacy_session() as s, s.get(url) as response):
df = pd.DataFrame(response.json())
except HTTPError as exc:
response = requests.get(url)
print(f"Consulta falhou: {response.text}")
print(f'Consulta falhou: {response.text}')
return None
return df

Expand All @@ -116,14 +116,14 @@ def list_agregados(**kwargs):
:param kwargs: parâmetros válidos: período, assunto, classificacao, periodicidade,nivel.
:return: Dataframe
"""
url = APIBASE + "agregados?"
url += "&".join([f"{k}={v}" for k, v in kwargs.items()])
print(f"Fetching Data groupings from {url}")
url = APIBASE + 'agregados?'
url += '&'.join([f'{k}={v}' for k, v in kwargs.items()])
print(f'Fetching Data groupings from {url}')
try:
with (get_legacy_session() as s, s.get(url) as response):
table = pd.DataFrame(response.json())
except requests.exceptions.SSLError as e:
print(f"Failed fetching aggregates: {e}")
print(f'Failed fetching aggregates: {e}')
return pd.DataFrame()
return table

Expand All @@ -136,12 +136,12 @@ def localidades_por_agregado(agregado: int, nivel: str):
delimitados pelo caracter | (pipe). p.ex. N7|N6
:return:
"""
url = APIBASE + f"agregados/{agregado}/localidades/{nivel}"
url = APIBASE + f'agregados/{agregado}/localidades/{nivel}'
try:
with (get_legacy_session() as s, s.get(url) as response):
table = pd.DataFrame(response.json())
except Exception as e:
print(f"Could not download from {url}\n{e}")
print(f'Could not download from {url}\n{e}')
return None
return table

Expand All @@ -152,12 +152,12 @@ def metadados(agregado: int):
:param agregado: Identificador do agregado
"""
url = APIBASE + f"agregados/{agregado}/metadados"
url = APIBASE + f'agregados/{agregado}/metadados'
try:
with (get_legacy_session() as s, s.get(url) as response):
data = response.json()
except Exception as e:
print(f"Could not download from {url}\n{e}")
print(f'Could not download from {url}\n{e}')
return None
return data

Expand All @@ -168,7 +168,7 @@ def lista_periodos(agregado: int):
:param agregado:
:return: pd.DataFrame com os períodos de atualização
"""
url = APIBASE + f"agregados/{agregado}/periodos"
url = APIBASE + f'agregados/{agregado}/periodos'
try:
with (get_legacy_session() as s, s.get(url) as response):
table = pd.DataFrame(response.json())
Expand Down Expand Up @@ -229,19 +229,19 @@ class FetchData:
"""

def __init__(
self, agregado: int, periodos: str, variavel: str = "allxp", **kwargs
self, agregado: int, periodos: str, variavel: str = 'allxp', **kwargs
):
self.url = (
APIBASE
+ f"agregados/{agregado}/periodos/{periodos}/variaveis/{variavel}?"
+ f'agregados/{agregado}/periodos/{periodos}/variaveis/{variavel}?'
)
self.url += "&".join([f"{k}={v}" for k, v in kwargs.items()])
self.url += '&'.join([f'{k}={v}' for k, v in kwargs.items()])
self.JSON = None
self._fetch_JSON()

def _fetch_JSON(self):
try:
print(f"Fetching {self.url}")
print(f'Fetching {self.url}')
with (get_legacy_session() as s, s.get(self.url) as response):
self.JSON = response.json()
except Exception as e:
Expand Down Expand Up @@ -285,5 +285,5 @@ def get_legacy_session():
ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
ctx.options |= 0x4 # OP_LEGACY_SERVER_CONNECT
session = requests.session()
session.mount("https://", CustomHttpAdapter(ctx))
session.mount('https://', CustomHttpAdapter(ctx))
return session
52 changes: 26 additions & 26 deletions pysus/online_data/Infodengue.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
# from loguru import logger

APP_DIR = Path(__file__).resolve(strict=True).parent.parent
CID10 = {"dengue": "A90", "chikungunya": "A92.0", "zika": "A928"}
CID10 = {'dengue': 'A90', 'chikungunya': 'A92.0', 'zika': 'A928'}

with open(APP_DIR / "dataset/geocode_by_cities.json", "r") as f:
with open(APP_DIR / 'dataset/geocode_by_cities.json', 'r') as f:
geocode_by_cities = json.load(f)


def normalize(s):
for p in string.punctuation:
s = s.replace(p, "")
s = s.replace(p, '')

return unidecode.unidecode(s.lower().strip())

Expand All @@ -39,7 +39,7 @@ def search_string(substr: str) -> Dict[str, int]:

matching_cities = [
get_close_matches(i, normalized_list, n=55)
for i in normalize(substr).split(".")
for i in normalize(substr).split('.')
]

return {
Expand All @@ -54,7 +54,7 @@ def download(
eyw_start: int,
eyw_end: int,
city_name: str,
format="csv",
format='csv',
) -> pd.DataFrame:
"""
Download InfoDengue API data by municipality and disease
Expand All @@ -77,38 +77,38 @@ def download(

if disease not in CID10.keys():
raise Exception(
f"The diseases available are: {[k for k in CID10.keys()]}"
f'The diseases available are: {[k for k in CID10.keys()]}'
)
elif len(str(eyw_start)) != 6 or len(str(eyw_end)) != 6:
raise Exception(
"The epidemiological week must contain 6 digits, "
"started in the year 2010 until 2022. Example: 202248"
'The epidemiological week must contain 6 digits, '
'started in the year 2010 until 2022. Example: 202248'
)
elif geocode is None:
list_of_cities = search_string(city_name)
print(f"You must choose one of these city names: {list_of_cities}")
print(f'You must choose one of these city names: {list_of_cities}')
else:
s_yw = str(eyw_start)
e_yw = str(eyw_end)
ew_start, ey_start = s_yw[-2:], s_yw[:4]
ew_end, ey_end = e_yw[-2:], e_yw[:4]
url = "https://info.dengue.mat.br/api/alertcity"
url = 'https://info.dengue.mat.br/api/alertcity'
params = (
"&disease="
+ f"{disease}"
+ "&geocode="
+ f"{geocode}"
+ "&format="
+ f"{format}"
+ "&ew_start="
+ f"{ew_start}"
+ "&ew_end="
+ f"{ew_end}"
+ "&ey_start="
+ f"{ey_start}"
+ "&ey_end="
+ f"{ey_end}"
'&disease='
+ f'{disease}'
+ '&geocode='
+ f'{geocode}'
+ '&format='
+ f'{format}'
+ '&ew_start='
+ f'{ew_start}'
+ '&ew_end='
+ f'{ew_end}'
+ '&ey_start='
+ f'{ey_start}'
+ '&ey_end='
+ f'{ey_end}'
)

url_resp = "?".join([url, params])
return pd.read_csv(url_resp, index_col="SE").T
url_resp = '?'.join([url, params])
return pd.read_csv(url_resp, index_col='SE').T
Loading

0 comments on commit eddb93c

Please sign in to comment.