[1]:
import pandas as pd
from pysus.preprocessing.geodata import add_data_to_municipality
from pysus.preprocessing.SIM import group_and_count, redistribute_missing, redistribute_cid_chapter
from pysus.preprocessing.decoders import translate_variables_SIM

from pysus.ftp.databases.sim import SIM

sim = SIM().load()

states = ["AC", "AM", "AP", "PA", "RO", "RR", "TO"]
variables = ['CODMUNRES','SEXO','IDADE_ANOS','CID10_CHAPTER']
filter_variables = variables[:3]
geo_df = pd.DataFrame()
ages_args = {
    "start": 0,
    "end": 90,
    "freq": 5
}
[2]:
import warnings
warnings.filterwarnings('ignore')

for state in states:
    df = sim.download(sim.get_files("CID10",state,2018)).to_dataframe()
    df = translate_variables_SIM(df,age_classes=True,classify_args=ages_args,classify_cid10_chapters=True)
    df = df[variables]
    counts = group_and_count(df,variables)
    counts = redistribute_missing(counts,filter_variables)
    counts = redistribute_cid_chapter(counts,filter_variables)
    geo_df = geo_df.append(add_data_to_municipality(counts,title_cols=['SEXO','IDADE_ANOS','CID10_CHAPTER'], nan_string='NA'))
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 330k/330k [00:00<00:00, 167MB/s]
2023-09-19 13:53:06.107 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:06.128 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:06.129 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:06.371 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:06.392 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:06.393 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1.42M/1.42M [00:00<00:00, 1.07GB/s]
2023-09-19 13:53:08.409 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:08.431 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:08.432 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:08.879 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:08.899 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:08.899 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 264k/264k [00:00<00:00, 173MB/s]
2023-09-19 13:53:11.067 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:11.088 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:11.089 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:11.311 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:11.332 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:11.333 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 3.22M/3.22M [00:00<00:00, 4.17GB/s]
2023-09-19 13:53:13.178 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:13.199 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:13.200 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:14.100 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:14.122 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:14.123 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 665k/665k [00:00<00:00, 509MB/s]
2023-09-19 13:53:19.139 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:19.160 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:19.161 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:19.459 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:19.481 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:19.482 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 230k/230k [00:00<00:00, 146MB/s]
2023-09-19 13:53:21.358 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:21.379 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:21.381 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:21.597 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:21.618 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:21.619 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 659k/659k [00:00<00:00, 554MB/s]
2023-09-19 13:53:22.812 | DEBUG    | pysus.online_data.SIM:get_municipios:185 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:22.833 | DEBUG    | pysus.online_data.SIM:get_municipios:189 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:22.834 | INFO     | pysus.online_data.SIM:get_municipios:199 - Local parquet file found at /home/bida/pysus/SIM_CADMUN_.parquet
2023-09-19 13:53:23.138 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:47 - Stablishing connection with ftp.datasus.gov.br.
220 Microsoft FTP Service
2023-09-19 13:53:23.159 | DEBUG    | pysus.online_data.SIM:get_CID10_chapters_table:51 - Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS
2023-09-19 13:53:23.161 | INFO     | pysus.online_data.SIM:get_CID10_chapters_table:61 - Local parquet file found at /home/bida/pysus/SIM_CIDCAP10_.parquet
[3]:
geo_df.columns
[3]:
Index(['code_muni', 'name_muni', 'code_state', 'abbrev_state', 'name_state',
       'code_region', 'name_region', 'geometry', 'Feminino-[0.0, 5.0)-1',
       'Feminino-[0.0, 5.0)-2',
       ...
       'NA-[50.0, 55.0)-8', 'NA-[55.0, 60.0)-8', 'NA-[60.0, 65.0)-8',
       'NA-[65.0, 70.0)-8', 'NA-[70.0, 75.0)-8', 'NA-[75.0, 80.0)-8',
       'NA-[80.0, 85.0)-8', 'NA-[85.0, 90.0)-8', 'NA-[90.0, inf)-8',
       'NA-NA-8'],
      dtype='object', length=1088)