Source code for pysus.api.dadosgov.databases
"""Pre-configured health database definitions accessible via dados.gov.br."""
import re
from typing import Any
from pysus.utils import zfill_year
from .models import Dataset
MONTHS: dict[str, int] = {
"jan": 1,
"fev": 2,
"mar": 3,
"abr": 4,
"mai": 5,
"jun": 6,
"jul": 7,
"ago": 8,
"set": 9,
"out": 10,
"nov": 11,
"dez": 12,
}
def _parse_year(val: str) -> int | None:
"""Parse a year string into an integer within the valid range."""
try:
y = int(val)
return y if 1970 <= y <= 2100 else None
except ValueError:
return None
def _skip(name: str) -> bool:
"""Check whether a filename should be skipped by naming conventions."""
return name.startswith("get_") or name.lower().endswith(".pdf")
[docs]
class CNES(Dataset):
"""Cadastro Nacional de Estabelecimentos de Saúde (CNES)."""
ids: list[str] = [
"40a0d093-b12f-44a4-bdc7-bae8eb54dd04",
"9455b341-b06e-408e-8e10-54b32b3d74ec",
]
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"CNES"``.
"""
return "CNES"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Cadastro Nacional de Estabelecimentos de Saúde"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
A Portuguese description of the CNES information system.
"""
return (
"O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o "
"sistema de informação oficial de cadastramento de informações "
"de todos os estabelecimentos de saúde no país."
)
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a CNES filename and extract metadata.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip()
if _skip(name):
return {"state": None, "year": None, "month": None}
m = re.search(r"_(\d{2})-(\d{4})\.csv$", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(2)),
"month": int(m.group(1)),
}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
[docs]
class PNI(Dataset):
"""Programa Nacional de Imunizações (PNI)."""
ids: list[str] = [
"2989d396-cb09-47e7-a3b8-a4b951ca0200",
"543aa08a-46c4-44e8-802e-198daa30753d",
"04292d08-ee4f-463a-b7b5-76cfb76775b3",
"7ed6eecc-c254-475c-92c5-daba5727596b",
"783b7456-6a6c-4025-a8bd-8e9caa0fb962",
"c6c3c6f3-2026-48a2-84ac-d8039714a0ba",
"9a25b796-80e3-444a-a4e7-405f5596d8ab",
]
_PNI_PREFIX = "doses-aplicadas-pelo-programa-de-nacional-de-imunizacoes-pni"
group_aliases: dict[str, str] = {
_PNI_PREFIX: "DPNI",
f"{_PNI_PREFIX}-2020": "DPNI",
f"{_PNI_PREFIX}-2021": "DPNI",
f"dataset-{_PNI_PREFIX}_2022": "DPNI",
f"{_PNI_PREFIX}-2023": "DPNI",
f"{_PNI_PREFIX}-2025": "DPNI",
f"{_PNI_PREFIX}-2026": "DPNI",
}
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"PNI"``.
"""
return "PNI"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Programa Nacional de Imunizações"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
A Portuguese description of the PNI vaccination monitoring system.
"""
return "O PNI monitora a cobertura vacinal e doses aplicadas no Brasil."
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a PNI vaccination filename into month and year.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip().lower()
if _skip(name):
return {"state": None, "year": None, "month": None}
m = re.match(r"vacinacao_(\w{3})_(\d{4})_csv\.zip", name)
if m:
month = MONTHS.get(m.group(1))
year = _parse_year(m.group(2))
return {"state": None, "year": year, "month": month}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
[docs]
class SIA(Dataset):
"""Sistema de Informações Ambulatoriais (SIA)."""
ids: list[str] = [
"9a335cb7-2b4f-4fce-8947-e8441b4a90af",
]
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"SIA"``.
"""
return "SIA"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Sistema de Informações Ambulatoriais"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
A Portuguese description of the SIA outpatient information system.
"""
return """
O SIA acompanha as ações de saúde produzidas no âmbito ambulatorial.
"""
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse an SIA filename into year.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip().lower()
if _skip(name):
return {"state": None, "year": None, "month": None}
m = re.search(r"_(\d{4})_\.csv$", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}
m = re.search(r"_(\w{3})-out_(\d{4})_\.csv$", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(2)),
"month": None,
}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
[docs]
class SINAN(Dataset):
"""Sistema de Informação de Agravos de Notificação (SINAN)."""
ids: list[str] = [
"4d5e5d44-58a8-4d67-b8aa-4ef1e4b00a1c",
"5699abe0-0510-4da8-b47d-209b3bb32b34",
"4557ba96-7d52-4a56-bd6f-f99a5af09f77",
"740ce8f4-7a5d-4351-aad4-7623f2490ada",
"cf044c1b-b966-4d0e-bab0-f3aa65897b7d",
"2d4997fb-cd11-4ce2-b217-09cd50e3151f",
"8a585222-4c2e-43b7-807d-59355ee79c48",
"527e8665-de64-4f81-b7c3-40b59c7d1d3c",
]
group_aliases: dict[str, str] = {
"arboviroses-dengue": "DENG",
"arboviroses-febre-de-chikungunya": "CHIK",
"arboviroses-zika-virus": "ZIKA",
"hanseniase": "HANS",
"dados-tuberculose": "TUBE",
"sifilis": "SIFA",
}
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"SINAN"``.
"""
return "SINAN"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Sistema de Informação de Agravos de Notificação"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
A Portuguese description of the SINAN notifiable diseases system.
"""
return """
O SINAN é alimentado pela notificação de doenças de notificação
compulsória
"""
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a SINAN filename into state and year.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip().upper()
if _skip(name):
return {"state": None, "year": None, "month": None}
m = re.match(r"(\w{4})(BR)(\d{2})\.CSV\.ZIP", name)
if m:
return {
"state": m.group(2),
"year": zfill_year(m.group(3)),
"month": None,
}
m = re.match(r"MPX_(\d{4})_OPENDATASUS\.CSV\.ZIP", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
[docs]
class SIM(Dataset):
"""Sistema de Informação sobre Mortalidade (SIM)."""
ids: list[str] = [
"5f121f4d-47c6-428e-8ec6-e8ec56417172",
]
group_aliases: dict[str, str] = {
"sim-1979-2019": "DO",
}
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"SIM"``.
"""
return "SIM"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Sistema de Informação sobre Mortalidade"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
A Portuguese description of the SIM mortality information system.
"""
return """
O SIM coleta dados sobre óbitos no país para análise epidemiológica.
"""
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a SIM filename into year.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip()
if _skip(name):
return {"state": None, "year": None, "month": None}
m = re.search(r"Mortalidade_Geral_(\d{4})_csv\.zip", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}
m = re.match(r"DO(\d{2})OPEN", name)
if m:
return {
"state": None,
"year": zfill_year(m.group(1)),
"month": None,
}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
[docs]
class SINASC(Dataset):
"""Sistema de Informações sobre Nascidos Vivos (SINASC)."""
ids: list[str] = [
"441cc6bd-684a-4afd-a88b-ba4734c9e83e",
]
group_aliases: dict[str, str] = {
"sistema-de-informacao-sobre-nascidos-vivos-sinasc-1996-a-20201": "DN",
}
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"SINASC"``.
"""
return "SINASC"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Sistema de Informações sobre Nascidos Vivos"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
Portuguese description of the SINASC live birth system.
"""
return """
O SINASC fornece subsídios para o diagnóstico de saúde e
planejamento de políticas de natalidade.
"""
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a SINASC filename into year.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip()
if _skip(name):
return {"state": None, "year": None, "month": None}
m = re.search(r"SINASC_(\d{4})_csv\.zip", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}
m = re.search(r"DNBR(\d{4})_csv\.zip", name)
if m:
return {
"state": "BR",
"year": _parse_year(m.group(1)),
"month": None,
}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
[docs]
class COVID19(Dataset):
"""Casos Confirmados de COVID-19."""
ids: list[str] = [
"1ba1801e-aec0-4dba-ae2a-7732f0a0c9f7",
]
@property
def name(self) -> str:
"""Return the short name.
Returns
-------
str
The abbreviated dataset name ``"COVID19"``.
"""
return "COVID19"
@property
def long_name(self) -> str:
"""Return the human-readable name.
Returns
-------
str
The full Portuguese name of the dataset.
"""
return "Casos Confirmados de COVID-19"
@property
def description(self) -> str:
"""Return a description of the dataset.
Returns
-------
str
A Portuguese description of the COVID-19 confirmed cases dataset.
"""
return "Dados anonimizados de casos confirmados de COVID-19."
[docs]
def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a COVID-19 filename and extract metadata.
Parameters
----------
filename : str
The name of the file to parse.
Returns
-------
dict[str, Any]
A dictionary with keys ``state``, ``year``, and ``month``.
Unrecognised files return ``None`` for all keys.
"""
try:
name = filename.strip().lower()
if _skip(name) or name.endswith(".xlsx"):
return {"state": None, "year": None, "month": None}
if name.endswith(".csv"):
return {"state": None, "year": None, "month": None}
return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}
AVAILABLE_DATABASES: list[type[Dataset]] = [
CNES,
PNI,
SIA,
SIM,
SINAN,
SINASC,
COVID19,
]