Skip to content

Merge branch 'main' into config-repeating-fields #25

Merge branch 'main' into config-repeating-fields

Merge branch 'main' into config-repeating-fields #25

GitHub Actions / Test Results failed Oct 12, 2023 in 0s

1 fail in 7s

1 tests   0 ✔️  7s ⏱️
1 suites  0 💤
1 files    1

Results for commit f7ae632.

Annotations

Check warning on line 0 in tests.test_main.TestHAPIPipelines

See this annotation in the file changed.

@github-actions github-actions / Test Results

test_pipelines (tests.test_main.TestHAPIPipelines) failed

test-results.xml [took 5s]
Raw output
KeyError: None
self = <test_main.TestHAPIPipelines object at 0x7fed8c108bd0>
configuration = {'hdx_prod_site': {'url': 'https://data.humdata.org'}, 'hdx_demo_site': {'url': 'https://demo.data-humdata-org.ahconu.... 'org_acronym', 'org_type_name', 'sector'], 'output_hxl': ['#org+name', '#org+acronym', '#org+type+name', '#sector']}}}
folder = 'tests/fixtures'

    def test_pipelines(self, configuration, folder):
        with ErrorsOnExit() as errors_on_exit:
            with temp_dir(
                "TestHAPIPipelines",
                delete_on_success=True,
                delete_on_failure=False,
            ) as temp_folder:
                dbpath = join(temp_folder, "test_hapi.db")
                try:
                    remove(dbpath)
                except OSError:
                    pass
                logger.info(f"Creating database {dbpath}")
                with Database(database=dbpath, dialect="sqlite") as session:
                    today = parse_date("2023-10-11")
                    Read.create_readers(
                        temp_folder,
                        join(folder, "input"),
                        temp_folder,
                        False,
                        True,
                        today=today,
                    )
                    logger.info("Initialising pipelines")
                    pipelines = Pipelines(
                        configuration,
                        session,
                        today,
                        errors_on_exit=errors_on_exit,
                        use_live=False,
                    )
                    logger.info("Running pipelines")
>                   pipelines.run()

tests/test_main.py:86: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/hapi/pipelines/app/pipelines.py:107: in run
    self.runner.run()
../../../.local/share/hatch/env/virtual/hapi-pipelines/zEFzVURN/test.py3.11/lib/python3.11/site-packages/hdx/scraper/runner.py:655: in run
    self.run_scraper(name, force_run)
../../../.local/share/hatch/env/virtual/hapi-pipelines/zEFzVURN/test.py3.11/lib/python3.11/site-packages/hdx/scraper/runner.py:629: in run_scraper
    return self.run_one(name, force_run)
../../../.local/share/hatch/env/virtual/hapi-pipelines/zEFzVURN/test.py3.11/lib/python3.11/site-packages/hdx/scraper/runner.py:584: in run_one
    scraper.run()
../../../.local/share/hatch/env/virtual/hapi-pipelines/zEFzVURN/test.py3.11/lib/python3.11/site-packages/hdx/scraper/configurable/scraper.py:492: in run
    self.run_scraper(iterator)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <hdx.scraper.configurable.scraper.ConfigurableScraper object at 0x7fed8c1adc50>
iterator = <generator object Download._get_tabular_rows.<locals>.get_next at 0x7fed8a6d30a0>

    def run_scraper(self, iterator: Iterator[Dict]) -> None:
        """Run one configurable scraper given an iterator over the rows
    
        Args:
            iterator (Iterator[Dict]): Iterator over the rows
    
        Returns:
            None
        """
    
        valuedicts = {}
        for subset in self.subsets:
            for _ in subset["input"]:
                dict_of_lists_add(valuedicts, subset["filter"], {})
    
        def add_row(row):
            adm, should_process_subset = self.rowparser.parse(row)
            if not adm:
                return
            for i, subset in enumerate(self.subsets):
                if not should_process_subset[i]:
                    continue
                filter = subset["filter"]
                input_ignore_vals = subset.get("input_ignore_vals", [])
                input_transforms = subset.get("transform", {})
                list_cols = subset.get("list")
                sum_cols = subset.get("sum")
                process_cols = subset.get("process")
                input_append = subset.get("input_append", [])
                input_keep = subset.get("input_keep", [])
                for i, valcol in enumerate(subset["input"]):
                    valuedict = valuedicts[filter][i]
                    val = get_rowval(row, valcol)
                    input_transform = input_transforms.get(valcol)
                    if input_transform and val not in input_ignore_vals:
                        val = eval(input_transform.replace(valcol, "val"))
                    if sum_cols or process_cols:
                        dict_of_lists_add(valuedict, adm, val)
                    elif list_cols and valcol in list_cols:
                        dict_of_lists_add(valuedict, adm, val)
                    else:
                        curval = valuedict.get(adm)
                        if valcol in input_append:
                            if curval:
                                val = curval + val
                        elif valcol in input_keep:
                            if curval:
                                val = curval
                        valuedict[adm] = val
    
        for row in self.rowparser.filter_sort_rows(iterator):
            add_row(row)
    
        values = self.values[self.level_name]
        values_pos = 0
        for subset in self.subsets:
>           valdicts = valuedicts[subset["filter"]]
E           KeyError: None

../../../.local/share/hatch/env/virtual/hapi-pipelines/zEFzVURN/test.py3.11/lib/python3.11/site-packages/hdx/scraper/configurable/scraper.py:327: KeyError