diff --git a/oc_ds_converter/crossref/crossref_processing.py b/oc_ds_converter/crossref/crossref_processing.py index cff5d13..8700708 100644 --- a/oc_ds_converter/crossref/crossref_processing.py +++ b/oc_ds_converter/crossref/crossref_processing.py @@ -179,11 +179,6 @@ def get_id_manager(self, schema_or_id, id_man_dict): id_man = id_man_dict.get(schema) return id_man - def normalise_any_id(self, id_with_prefix): - id_man = self.doi_m - id_no_pref = ":".join(id_with_prefix.split(":")[1:]) - norm_id_w_pref = id_man.normalise(id_no_pref, include_prefix=True) - return norm_id_w_pref def dict_to_cache(self, dict_to_be_saved, path): path = Path(path) diff --git a/oc_ds_converter/jalc/jalc_processing.py b/oc_ds_converter/jalc/jalc_processing.py index 7d2e48a..a6c0d62 100644 --- a/oc_ds_converter/jalc/jalc_processing.py +++ b/oc_ds_converter/jalc/jalc_processing.py @@ -47,6 +47,8 @@ class JalcProcessing(RaProcessor): def __init__(self, orcid_index: str = None, doi_csv: str = None, publishers_filepath_jalc: str = None, testing: bool = True, storage_manager: Optional[StorageManager] = None, citing=True): + """This class is responsible for producing CSV tables to be used as input for the META process + aimed at ingesting data from the sources.""" super(JalcProcessing, self).__init__(orcid_index, doi_csv) self.citing = citing if storage_manager is None: @@ -61,12 +63,13 @@ def __init__(self, orcid_index: str = None, doi_csv: str = None, publishers_file self.jid_m = JIDManager(storage_manager=self.storage_manager) self.venue_id_man_dict = {"issn":self.issn_m, "jid":self.jid_m} - # Temporary storage managers : all data must be stored in tmp storage manager and passed all together to the - # main storage_manager only once the full file is processed. Checks must be done both on tmp and in - # storage_manager, so that in case the process breaks while processing a file which does not complete (so - # without writing the final file) all the data concerning the ids are not stored. Otherwise, the ids saved in - # a storage_manager db would be considered to have been processed and thus would be ignored by the process - # and lost. + + '''Temporary storage managers : all data must be stored in tmp storage manager and passed all together to the + main storage_manager only once a full file is processed. Checks must be done both on tmp and in + storage_manager, so that in case the process breaks while processing a file which does not complete (so + without writing the final file) all the data concerning the ids are not stored. Otherwise, the ids saved in + a storage_manager db would be considered to have been processed and thus would be ignored by the process + and lost.''' self.tmp_doi_m = DOIManager(storage_manager=self.temporary_manager) self.tmp_jid_m = JIDManager(storage_manager=self.temporary_manager) @@ -75,7 +78,7 @@ def __init__(self, orcid_index: str = None, doi_csv: str = None, publishers_file if testing: - self.BR_redis= fakeredis.FakeStrictRedis() + self.BR_redis = fakeredis.FakeStrictRedis() else: self.BR_redis = RedisDataSource("DB-META-BR") @@ -83,7 +86,6 @@ def __init__(self, orcid_index: str = None, doi_csv: str = None, publishers_file self._redis_values_br = [] if not publishers_filepath_jalc: - #we have removed the creation of the file if it is not passed as input self.publishers_filepath = None else: self.publishers_filepath = publishers_filepath_jalc @@ -106,22 +108,23 @@ def update_redis_values(self, br): self._redis_values_br = br def validated_as(self, id): - # Check if the validity was already retrieved and thus - # a) if it is now saved either in the in-memory database, which only concerns data validated - # during the current file processing; - # b) or if it is now saved in the storage_manager database, which only concerns data validated - # during the previous files processing. - # In memory db is checked first because the dimension is smaller and the check is faster and - # Because we assume that it is more likely to find the same ids in close positions, e.g.: same - # citing id in several citations with different cited ids. + """Check if the validity was already retrieved and thus + a) if it is now saved either in the in-memory database, which only concerns data validated + during the current file processing; + b) or if it is now saved in the storage_manager database, which only concerns data validated + during the previous files processing. + In memory db is checked first because the dimension is smaller and the check is faster and + because we assume that it is more likely to find the same ids in close positions, e.g.: same + citing id in several citations with different cited ids. + In conclusion, if the id is found with this method, it means that this has been found in the dump we are processing""" validity_value = self.tmp_doi_m.validated_as_id(id) if validity_value is None: validity_value = self.doi_m.validated_as_id(id) return validity_value - #se incontro l'identificativo qua vuol dire che è già stato trovato all'interno del mio stesso dump + def get_id_manager(self, schema_or_id, id_man_dict): - """Given as input the string of a schema (e.g.:'pmid') and a dictionary mapping strings of + """Given as input the string of a schema (e.g.:'jid') and a dictionary mapping strings of the schemas to their id managers, the method returns the correct id manager. Note that each instance of the Preprocessing class needs its own instances of the id managers, in order to avoid conflicts while validating data""" @@ -133,12 +136,6 @@ def get_id_manager(self, schema_or_id, id_man_dict): id_man = id_man_dict.get(schema) return id_man - def normalise_any_id(self, id_with_prefix): - id_man = self.doi_m - id_no_pref = ":".join(id_with_prefix.split(":")[1:]) - norm_id_w_pref = id_man.normalise(id_no_pref, include_prefix=True) - return norm_id_w_pref - def dict_to_cache(self, dict_to_be_saved, path): path = Path(path) parent_dir_path = path.parent.absolute() @@ -148,10 +145,11 @@ def dict_to_cache(self, dict_to_be_saved, path): json.dump(dict_to_be_saved, fd, ensure_ascii=False, indent=4) def csv_creator(self, item:dict) -> dict: + """This is the method that actually creates the csv table for Meta process given an entity dictionary""" doi = item["doi"] if (doi and self.doi_set and doi in self.doi_set) or (doi and not self.doi_set): norm_id = self.doi_m.normalise(doi, include_prefix=True) - title = self.get_ja(item['title_list'])[0]['title'] if 'title_list' in item else '' # Future Water Availability in the Asian Monsoon Region: A Case Study in Indonesia (no available in japanese) + title = self.get_ja(item['title_list'])[0]['title'] if 'title_list' in item else '' authors_list = self.get_authors(item) authors_string_list, editors_string_list = self.get_agents_strings_list(doi, authors_list) issue = item['issue'] if 'issue' in item else '' @@ -175,7 +173,10 @@ def csv_creator(self, item:dict) -> dict: @classmethod - def get_ja(cls, field: list) -> list: # [{'publisher_name': '筑波大学農林技術センター', 'lang': 'ja'}] + def get_ja(cls, field: list) -> list: + """This method accepts as parameter a list containing dictionaries with the key "lang". + If a metadata is originally furnished both in the original language and in the english translation, + the method returns the japanese version, otherwise the english translation is returned.""" if all('lang' in item for item in field): ja = [item for item in field if item['lang'] == 'ja'] ja = list(filter(lambda x: x['type'] != 'before' if 'type' in x else x, ja)) @@ -199,11 +200,11 @@ def get_jalc_pages(self, item: dict) -> str: def get_publisher_name(self, item: dict) -> str: - ''' - This function acts differently for citing and cited entities. - If it processes a citing entity it simply returns a string with the name of the publisher if it has been provided in the input dictionary, giving priority to the japanese name. If there is no publisher, the output is an empty string. + """This method acts differently for citing and cited entities. + If it processes a citing entity it simply returns a string with the name of the publisher if it has been provided in the input dictionary, giving priority to the japanese name. + If there is no publisher, the output is an empty string. When it processes a cited entity, if a file containing a mapping of publishers' prefixes, names and crossref ids is provided, it extracts the prefix from the doi of the cited publication and checks if it is present in the mapping. - If yes, it returns the linked publisher's name, otherwise an empty string. ''' + If yes, it returns the linked publisher's name, otherwise an empty string. """ if self.citing: publisher = self.get_ja(item['publisher_list'])[0]['publisher_name'] if 'publisher_list' in item else '' elif not self.citing and self.publishers_mapping: @@ -243,6 +244,7 @@ def get_authors(self, data: dict) -> list: def get_venue(self, data: dict) -> str: venue_name = '' + journal_ids = [] if 'journal_title_name_list' in data: candidate_venues = self.get_ja(data['journal_title_name_list']) if candidate_venues: @@ -252,38 +254,17 @@ def get_venue(self, data: dict) -> str: elif candidate_venues: venue_name = candidate_venues[0]['journal_title_name'] if 'journal_id_list' in data: - # validation of venue ids - journal_ids = self.to_validated_venue_id_list(data['journal_id_list']) - else: - journal_ids = list() + for v in data['journal_id_list']: + if isinstance(v, dict): + if v.get("journal_id"): + if v.get("type").lower().strip() in ["issn", "jid"]: + schema = v.get("type").lower().strip() + venue_id = v.get("journal_id") + tmp_id_man = self.get_id_manager(schema, self.venue_tmp_id_man_dict) + if tmp_id_man: + norm_id = tmp_id_man.normalise(venue_id, include_prefix=True) + journal_ids.append(norm_id) return f"{venue_name} [{' '.join(journal_ids)}]" if journal_ids else venue_name - # 'Journal of Developments in Sustainable Agriculture [issn:1880-3016 issn:1880-3024 jid:jdsa]' - - def to_validated_venue_id_list(self, journal_id_list: list): - valid_identifiers = list() - for v in journal_id_list: - if isinstance(v, dict): - if v.get("journal_id"): - if v.get("type").lower().strip() in ["issn", "jid"]: - schema = v.get("type").lower().strip() - id = v.get("journal_id") - tmp_id_man = self.get_id_manager(schema, self.venue_tmp_id_man_dict) - if tmp_id_man: - if tmp_id_man == self.tmp_jid_m: - norm_id = tmp_id_man.normalise(id, include_prefix=True) - # if self.BR_redis.get(norm_id): - if norm_id and norm_id in self._redis_values_br: - tmp_id_man.storage_manager.set_value(norm_id, True) # In questo modo l'id presente in redis viene inserito anche nello storage e risulta già - # preso in considerazione negli step successivi - valid_identifiers.append(norm_id) - elif norm_id and tmp_id_man.is_valid(norm_id): - valid_identifiers.append(norm_id) - else: - norm_id = tmp_id_man.normalise(id, include_prefix=True) - if tmp_id_man.is_valid(norm_id): - valid_identifiers.append(norm_id) - return sorted(valid_identifiers) - @classmethod @@ -319,40 +300,41 @@ def get_pub_date(cls, data) -> str: pub_date_list.append(day) return '-'.join(pub_date_list) - #id_dict = {"identifier": "doi:10.11221/jima.51.86", "is_valid": None} + def to_validated_id_list(self, norm_id): """this method takes in input a normalized DOI identifier and the information of validity and returns a list valid and existent ids with prefixes. For each id, a first validation try is made by checking its presence in META db. If the id is not in META db yet, a second attempt is made by using the specific id-schema API""" - #if self.BR_redis.get(norm_id): + valid_id_list = [] if norm_id in self._redis_values_br: - self.tmp_doi_m.storage_manager.set_value(norm_id, True) #In questo modo l'id presente in redis viene inserito anche nello storage e risulta già - # preso in considerazione negli step successivi + self.tmp_doi_m.storage_manager.set_value(norm_id, True) valid_id_list.append(norm_id) # if the id is not in redis db, validate it before appending - elif self.tmp_doi_m.is_valid(norm_id):#In questo modo l'id presente in redis viene inserito anche nello storage e risulta già - # preso in considerazione negli step successivi + elif self.tmp_doi_m.is_valid(norm_id): valid_id_list.append(norm_id) return valid_id_list def memory_to_storage(self): kv_in_memory = self.temporary_manager.get_validity_list_of_tuples() - #if kv_in_memory: self.storage_manager.set_multi_value(kv_in_memory) self.temporary_manager.delete_storage() def extract_all_ids(self, citation, is_first_iteration: bool): - if is_first_iteration: + """Given an entity dictionary, this method extracts all the DOIs. + If the parameter "is_first_iteration" is True, just the DOI of the citing entity is retrieved, while + if it is False, all the DOIs of cited entities are extracted.""" + '''if is_first_iteration: list_id_citing = list() d1_br = citation["data"]["doi"] norm_id = self.doi_m.normalise(d1_br, include_prefix=True) if norm_id: list_id_citing.append(norm_id) - return list_id_citing + #for citing entities the validation is not necessary, so we add the normalized doi as valid to the temporary storage manager + #self.tmp_doi_m.storage_manager.set_value(norm_id, True) + return list_id_citing''' - # in questo modo sto raccogliendo tutti gli id dei citati per un dato citante - else: + if not is_first_iteration: all_br = list() d2_br = [x["doi"] for x in citation["data"]["citation_list"] if x.get("doi")] for d in d2_br: @@ -363,7 +345,6 @@ def extract_all_ids(self, citation, is_first_iteration: bool): def get_reids_validity_list(self, id_list): valid_br_ids = [] - # DO NOT UPDATED (REDIS RETRIEVAL METHOD HERE) validity_list_br = self.BR_redis.mget(id_list) for i, e in enumerate(id_list): if validity_list_br[i]: diff --git a/oc_ds_converter/run/crossref_process.py b/oc_ds_converter/run/crossref_process.py index 0b4ab01..4a808b5 100644 --- a/oc_ds_converter/run/crossref_process.py +++ b/oc_ds_converter/run/crossref_process.py @@ -217,7 +217,7 @@ def get_all_redis_ids_and_save_updates(sli_da, is_first_iteration_par:bool): has_doi_references = True if [x for x in entity["reference"] if x.get("DOI")] else False if has_doi_references: if is_first_iteration_par: - ent_all_br, ent_all_ra = crossref_csv.extract_all_ids(entity, True) + ent_all_br, ent_all_ra = crossref_csv.extract_all_ids(entity, True) else: ent_all_br, ent_all_ra = crossref_csv.extract_all_ids(entity, False) all_br.extend(ent_all_br) diff --git a/oc_ds_converter/run/jalc_process.py b/oc_ds_converter/run/jalc_process.py index 0fe0051..51d15bf 100644 --- a/oc_ds_converter/run/jalc_process.py +++ b/oc_ds_converter/run/jalc_process.py @@ -80,8 +80,8 @@ def preprocess(jalc_json_dir:str, publishers_filepath:str, orcid_doi_filepath:st continue base_name_el_to_skip = el_to_skip.replace('.zip', '') for el in os.listdir(jalc_json_dir): - if el == base_name_el_to_skip + "decompr_zip_dir": - #if el.startswith(base_name_el_to_skip) and el.endswith("decompr_zip_dir"): + if el == base_name_el_to_skip + "_decompr_zip_dir": + # if el.startswith(base_name_el_to_skip) and el.endswith("decompr_zip_dir"): all_input_zip = [os.path.join(jalc_json_dir, el, file) for file in os.listdir(os.path.join(jalc_json_dir, el)) if not file.endswith(".json") and not file.startswith("._")] @@ -90,6 +90,7 @@ def preprocess(jalc_json_dir:str, publishers_filepath:str, orcid_doi_filepath:st for zip_lev0 in os.listdir(jalc_json_dir): all_input_zip, targz_fd = get_all_files_by_type(os.path.join(jalc_json_dir, zip_lev0), req_type, cache) + # in test files the decompressed directory, at the end of each execution of the process, is always deleted else: all_input_zip = os.listdir(jalc_json_dir) for zip in all_input_zip: @@ -112,8 +113,6 @@ def preprocess(jalc_json_dir:str, publishers_filepath:str, orcid_doi_filepath:st with ProcessPool(max_workers=max_workers, max_tasks=1) as executor: for zip_file in all_input_zip: - print(zip_file) - future: ProcessFuture = executor.schedule( function=get_citations_and_metadata, args=( @@ -122,7 +121,6 @@ def preprocess(jalc_json_dir:str, publishers_filepath:str, orcid_doi_filepath:st with ProcessPool(max_workers=max_workers, max_tasks=1) as executor: for zip_file in all_input_zip: - print(zip_file) future: ProcessFuture = executor.schedule( function=get_citations_and_metadata, args=( @@ -208,23 +206,22 @@ def get_citations_and_metadata(zip_file: str, preprocessed_citations_dir: str, c pathoo(filepath) pathoo(filepath_citations) - def get_all_redis_ids_and_save_updates(sli_da, is_first_iteration_par:bool): + def get_all_redis_ids_and_save_updates(sli_da, is_first_iteration_par: bool): all_br = [] for entity in sli_da: if entity: - d = entity.get("data") + d = entity["data"] # filtering out entities without citations - if "citation_list" in d: + if d.get("citation_list"): cit_list = d["citation_list"] cit_list_doi = [x for x in cit_list if x.get("doi")] # filtering out entities with citations without dois if cit_list_doi: - if is_first_iteration_par: - ent_all_br = jalc_csv.extract_all_ids(entity, True) - else: + '''if is_first_iteration_par: + ent_all_br = jalc_csv.extract_all_ids(entity, True)''' + if not is_first_iteration_par: ent_all_br = jalc_csv.extract_all_ids(entity, False) - all_br.extend(ent_all_br) - + all_br.extend(ent_all_br) redis_validity_values_br = jalc_csv.get_reids_validity_list(all_br) jalc_csv.update_redis_values(redis_validity_values_br) @@ -308,20 +305,24 @@ def task_done(is_first_iteration_par: bool) -> None: print(e) if is_first_iteration: - get_all_redis_ids_and_save_updates(source_dict, is_first_iteration_par=True) # prima l'ultimo file va processato for entity in tqdm(source_dict): - #pbar.update() if entity: d = entity.get("data") #per i citanti la validazione non serve, se è normalizzabile va direttamente alla crezione tabelle Meta norm_source_id = jalc_csv.doi_m.normalise(d['doi'], include_prefix=True) - if norm_source_id: - source_tab_data = jalc_csv.csv_creator(d) - if source_tab_data: - processed_source_id = source_tab_data["id"] - if processed_source_id: - data_citing.append(source_tab_data) + + if not jalc_csv.doi_m.storage_manager.get_value(norm_source_id): + # add the id as valid to the temporary storage manager (whose values will be transferred to the redis storage manager at the + # time of the csv files creation process) and create a meta csv row for the entity in this case only + jalc_csv.tmp_doi_m.storage_manager.set_value(norm_source_id, True) + + if norm_source_id: + source_tab_data = jalc_csv.csv_creator(d) + if source_tab_data: + processed_source_id = source_tab_data["id"] + if processed_source_id: + data_citing.append(source_tab_data) save_files(data_citing, index_citations_to_csv, True) #pbar.close() diff --git a/test/crossref_process_test.py b/test/crossref_process_test.py new file mode 100644 index 0000000..ccf0e83 --- /dev/null +++ b/test/crossref_process_test.py @@ -0,0 +1,108 @@ +import os.path +import shutil +import unittest +from os.path import join +from oc_ds_converter.run.crossref_process import * + +BASE = os.path.join('test', 'crossref_processing') +TARGZ_INPUT_FOLDER = os.path.join(BASE, 'tar_gz_test') +TARGZ_INPUT = os.path.join(TARGZ_INPUT_FOLDER, '40228.tar.gz') +OUTPUT = os.path.join(BASE, 'output_dir') +PUBLISHERS_MAPPING = os.path.join(BASE, 'publishers.csv') +WANTED_DOIS_FOLDER = os.path.join(BASE, 'wanted_dois') +IOD = os.path.join(BASE, 'iod') +CACHE = os.path.join(BASE, 'cache.json') +DB = os.path.join(BASE, 'anydb.db') +TARGZ_CITED_INPUT_FOLDER = os.path.join(BASE, 'tar_gz_cited_test') +TARGZ_CITED_INPUT = os.path.join(TARGZ_CITED_INPUT_FOLDER, '3.tar.gz') + +class CrossrefProcessTest(unittest.TestCase): + def test_preprocess_base_decompress_and_read_without_cited(self): + """CASE 1: compressed input without cited entities""" + if os.path.exists(OUTPUT): + shutil.rmtree(OUTPUT) + + citations_output_path = OUTPUT + "_citations" + if os.path.exists(citations_output_path): + shutil.rmtree(citations_output_path) + + preprocess(TARGZ_INPUT, PUBLISHERS_MAPPING, IOD, OUTPUT, redis_storage_manager=False, storage_path=DB, cache = CACHE) + citations_in_output = 0 + encountered_ids = set() + unique_entities = 0 + + for file in os.listdir(citations_output_path): + with open(os.path.join(citations_output_path, file), 'r', encoding='utf-8') as f: + cits_rows = list(csv.DictReader(f)) + citations_in_output += len(cits_rows) + for x in cits_rows: + citing_ids = x["citing"].split(" ") + citied_ids = x["cited"].split(" ") + if all(id not in encountered_ids for id in citing_ids): + unique_entities += 1 + encountered_ids.update(citing_ids) + if all(id not in encountered_ids for id in citied_ids): + unique_entities += 1 + encountered_ids.update(citied_ids) + expected_entities_in_output = 0 + expected_citations_in_output=0 + self.assertEqual(expected_entities_in_output, unique_entities) + self.assertEqual(expected_citations_in_output, citations_in_output) + + shutil.rmtree(OUTPUT) + shutil.rmtree(citations_output_path) + + def test_preprocess_base_and_decompress_with_cited(self): + """CASE2: compressed input with cited entities""" + if os.path.exists(OUTPUT): + shutil.rmtree(OUTPUT) + + citations_output_path = OUTPUT + "_citations" + if os.path.exists(citations_output_path): + shutil.rmtree(citations_output_path) + + preprocess(crossref_json_dir=TARGZ_CITED_INPUT, publishers_filepath=PUBLISHERS_MAPPING, orcid_doi_filepath=IOD, csv_dir=OUTPUT, redis_storage_manager=False, storage_path=DB, cache = CACHE) + citations_in_output = 0 + encountered_ids = set() + unique_entities = 0 + + for file in os.listdir(citations_output_path): + with open(os.path.join(citations_output_path, file), 'r', encoding='utf-8') as f: + cits_rows = list(csv.DictReader(f)) + citations_in_output += len(cits_rows) + for x in cits_rows: + citing_ids = x["citing"].split(" ") + citied_ids = x["cited"].split(" ") + if all(id not in encountered_ids for id in citing_ids): + unique_entities += 1 + encountered_ids.update(citing_ids) + if all(id not in encountered_ids for id in citied_ids): + unique_entities += 1 + encountered_ids.update(citied_ids) + expected_entities_in_output = 17 + expected_citations_in_output = 16 + self.assertEqual(expected_entities_in_output, unique_entities) + self.assertEqual(expected_citations_in_output, citations_in_output) + + '''citations_files_n = len(list(os.listdir(citations_output_path))) + + #shutil.rmtree(citations_output_path) + + meta_files_n = len(list(os.listdir(OUTPUT))) + + # Make sure that a meta table row was created for each entity + entities_in_meta_output = 0 + for file in os.listdir(OUTPUT): + with open(os.path.join(OUTPUT, file), 'r', encoding='utf-8') as f: + entities_in_meta_output += len(list(csv.DictReader(f))) + + self.assertEqual(expected_entities_in_output, entities_in_meta_output) + self.assertEqual(unique_entities, entities_in_meta_output) + + + # make sure that for each of the input files was created a citation file and two meta input file + self.assertTrue(meta_files_n == 2) + self.assertTrue(citations_files_n == 1) + + #shutil.rmtree(OUTPUT)''' + '''os.remove(DB)''' \ No newline at end of file diff --git a/test/crossref_processing/0.json b/test/crossref_processing/0.json new file mode 100644 index 0000000..b964df0 --- /dev/null +++ b/test/crossref_processing/0.json @@ -0,0 +1 @@ +{"items":[{"indexed":{"date-parts":[[2020,4,15]],"date-time":"2020-04-15T06:07:59Z","timestamp":1586930879898},"reference-count":42,"publisher":"Cambridge University Press (CUP)","issue":"1","license":[{"URL":"https://www.cambridge.org/core/terms","start":{"date-parts":[[2013,4,8]],"date-time":"2013-04-08T00:00:00Z","timestamp":1365379200000},"delay-in-days":769,"content-version":"unspecified"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Disaster med. public health prep."],"published-print":{"date-parts":[[2011,3]]},"abstract":"ABSTRACTEffective emergency response among hospitals and other health care providers stems from multiple factors depending on the nature of the emergency. While local emergencies can test hospital acute care facilities, prolonged national emergencies, such as the 2009 H1N1 outbreak, raise significant challenges. These events involve sustained surges of patients over longer periods and spanning entire regions. They require significant and sustained coordination of personnel, services, and supplies among hospitals and other providers to ensure adequate patient care across regions. Some hospitals, however, may lack structural principles to help coordinate care and guide critical allocation decisions. This article discusses a model Memorandum of Understanding (MOU) that sets forth essential principles on how to allocate scarce resources among providers across regions. The model seeks to align regional hospitals through advance agreements on procedures of mutual aid that reflect modern principles of emergency preparedness and changing legal norms in declared emergencies.(Disaster Med Public Health Preparedness. 2011;5:54-61)","DOI":"10.1001/10-v4n2-hsf10003","type":"journal-article","created":{"date-parts":[[2010,7,20]],"date-time":"2010-07-20T04:38:59Z","timestamp":1279600739000},"page":"54-61","source":"Crossref","is-referenced-by-count":7,"title":["Facilitating Hospital Emergency Preparedness: Introduction of a Model Memorandum of Understanding"],"prefix":"10.1017","volume":"5","author":[{"suffix":"Jr","given":"James G.","family":"Hodge","affiliation":[]},{"given":"Evan D.","family":"Anderson","affiliation":[]},{"given":"Thomas D.","family":"Kirsch","affiliation":[]},{"given":"Gabor D.","family":"Kelen","affiliation":[]}],"member":"56","published-online":{"date-parts":[[2013,4,8]]},"reference":[{"key":"S1935789300003219_ref042","unstructured":"PACER Model MOU \u00a7 3.2."},{"key":"S1935789300003219_ref038","unstructured":"PACER Model MOU \u00a7 2.6."},{"key":"S1935789300003219_ref037","unstructured":"PACER Model MOU \u00a7 2.5."},{"key":"S1935789300003219_ref036","unstructured":"PACER Model MOU \u00a7 2.9(b)."},{"key":"S1935789300003219_ref034","unstructured":"PACER Model MOU \u00a7 1.8."},{"key":"S1935789300003219_ref033","unstructured":"42 USC \u00a7 1395dd (2003)."},{"key":"S1935789300003219_ref032","unstructured":"Valhal Corporation v Sullivan Associates, Inc, 44 F3d 195, 202 (3d Cir. 1995)."},{"key":"S1935789300003219_ref031","author":"Franklin","article-title":"Tort Law and Alternatives.","journal-title":"New York: Foundation Press; 2001"},{"key":"S1935789300003219_ref030","article-title":"US Constitution.","journal-title":"art. I, sec. 10, cl. 3"},{"key":"S1935789300003219_ref029","DOI":"10.2105/AJPH.2006.101626","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref021","DOI":"10.1001/jama.299.12.1471","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref016","DOI":"10.1177/003335490812300219","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref026","article-title":"Domain B2: Assessment and Recommendation for Standardization of Memoranda of Understanding (MOUs) for Enhanced Health System Integration.","journal-title":"Briefing Book Re: MOUs in Emergency Settings (on file with the author), October 9, 2007"},{"key":"S1935789300003219_ref025","DOI":"10.1089/bsp.2008.0020","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref011","DOI":"10.1097/01.CCM.0000151067.76074.21","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref039","unstructured":"PACER Model MOU \u00a7 2.4."},{"key":"S1935789300003219_ref005","DOI":"10.1177/003335490912400218","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref007","DOI":"10.1097/DMP.0b013e31817196bf","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref012","DOI":"10.1056/NEJMsa021807","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref018","DOI":"10.1097/DMP.0b013e31819d977c","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref035","unstructured":"PACER Model MOU \u00a7 2.9(a)."},{"key":"S1935789300003219_ref002","DOI":"10.1097/DMP.0b013e31819f1ae2","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref015","DOI":"10.1097/DMP.0b013e318194898d","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref040","unstructured":"PACER Model MOU \u00a7 3.1."},{"key":"S1935789300003219_ref001","article-title":"Guidance for Establishing Standards of Care for Use in Disaster Situations: A Letter Report.","journal-title":"Washington, DC: National Academies of Science; 2010"},{"key":"S1935789300003219_ref003","DOI":"10.1378/chest.07-2693","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref004","article-title":"Centers for Law and the Public's Health: A Collaborative at Johns Hopkins and Georgetown Universities.","journal-title":"2009 H1N1 (Swine Flu) Legal Preparedness and Response."},{"key":"S1935789300003219_ref006","journal-title":"Model Memorandum of Understanding Between Hospitals During Declared Emergencies."},{"key":"S1935789300003219_ref008","DOI":"10.1016/S0196-0644(99)70224-6","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref022","article-title":"US Constitution.","journal-title":"amend. XIII"},{"key":"S1935789300003219_ref009","DOI":"10.1097/01.CCM.0000151072.17826.72","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref010","DOI":"10.1097/01.BCR.0000155527.76205.A2","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref041","unstructured":"PACER Model MOU \u00a7 3.1(f)."},{"key":"S1935789300003219_ref013","journal-title":"Report 109-377: A Failure of Initiative\u2014Final Report of the Select Bipartisan Committee to Investigate the Preparation for and Response to Hurricane Katrina."},{"key":"S1935789300003219_ref014","first-page":"249","article-title":"Principles and practice of legal triage during public health emergencies.","volume":"64","author":"Hodge","year":"2008","journal-title":"NYU Ann Surv Am L."},{"key":"S1935789300003219_ref017","article-title":"Altered Standards of Care in Mass Casualty Events: Bioterrorism and Other Public Health Emergencies.","journal-title":"Publ No. 05-0043. Rockville, MD; 2005"},{"key":"S1935789300003219_ref019","DOI":"10.2105/AJPH.2009.162677","doi-asserted-by":"publisher"},{"key":"S1935789300003219_ref023","article-title":"US Constitution.","journal-title":"amend. V and XIV"},{"key":"S1935789300003219_ref020","journal-title":"Section VII, Non-Discrimination Principles of the Law."},{"key":"S1935789300003219_ref024","first-page":"1009","article-title":"Thirteen ways of looking at Katrina: human and civil rights left behind again.","volume":"81","author":"Quigley","year":"2007","journal-title":"Tulane Law Rev."},{"key":"S1935789300003219_ref027","journal-title":"NIMS Alert: NIMS implementation activities for hospitals and healthcare systems."},{"key":"S1935789300003219_ref028","journal-title":"International Emergency Management Assistance Memorandum of Understanding."}],"container-title":["Disaster Medicine and Public Health Preparedness"],"original-title":[],"link":[{"URL":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/S1935789300003219","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,4,27]],"date-time":"2019-04-27T19:20:17Z","timestamp":1556392817000},"score":null,"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,3]]},"references-count":42,"alternative-id":["S1935789300003219"],"URL":"http://dx.doi.org/10.1001/10-v4n2-hsf10003","relation":{"cites":[]},"ISSN":["1935-7893","1938-744X"],"issn-type":[{"value":"1935-7893","type":"print"},{"value":"1938-744X","type":"electronic"}],"subject":["Public Health, Environmental and Occupational Health"]}]} diff --git a/test/crossref_processing_test.py b/test/crossref_processing_test.py new file mode 100644 index 0000000..bcf52ab --- /dev/null +++ b/test/crossref_processing_test.py @@ -0,0 +1,751 @@ +from oc_ds_converter.crossref.crossref_processing import CrossrefProcessing +import unittest +import os +import json +from oc_ds_converter.lib.csvmanager import CSVManager +from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager +from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager +from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager +from oc_ds_converter.lib.jsonmanager import * +TEST_DIR = os.path.join("test", "crossref_processing") +JSON_FILE = os.path.join(TEST_DIR, "0.json") +TMP_SUPPORT_MATERIAL = os.path.join(TEST_DIR, "tmp_support") +IOD = os.path.join(TEST_DIR, 'iod') +WANTED_DOIS_FOLDER = os.path.join(TEST_DIR, 'wanted_dois') +DATA = os.path.join(TEST_DIR, '40228.json') +PUBLISHERS_MAPPING = os.path.join(TEST_DIR, 'publishers.csv') + + + +class TestCrossrefProcessing(unittest.TestCase): + def test_extract_all_ids_cited(self): + c_processing = CrossrefProcessing() + with open(JSON_FILE, encoding="utf8") as f: + result = json.load(f) + for entity_dict in result['items']: + results_ids = c_processing.extract_all_ids(entity_dict, False) + br = results_ids[0] + expected_br = ['doi:10.2105/ajph.2006.101626', 'doi:10.1001/jama.299.12.1471', 'doi:10.1177/003335490812300219', 'doi:10.1089/bsp.2008.0020', 'doi:10.1097/01.ccm.0000151067.76074.21', 'doi:10.1177/003335490912400218', 'doi:10.1097/dmp.0b013e31817196bf', 'doi:10.1056/nejmsa021807', 'doi:10.1097/dmp.0b013e31819d977c', 'doi:10.1097/dmp.0b013e31819f1ae2', 'doi:10.1097/dmp.0b013e318194898d', 'doi:10.1378/chest.07-2693', 'doi:10.1016/s0196-0644(99)70224-6', 'doi:10.1097/01.ccm.0000151072.17826.72', 'doi:10.1097/01.bcr.0000155527.76205.a2', 'doi:10.2105/ajph.2009.162677'] + self.assertEqual(set(expected_br), set(br)) + c_processing.storage_manager.delete_storage() + + def test_extract_all_ids_cited_redis(self): + c_processing = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + with open(JSON_FILE, encoding="utf8") as f: + result = json.load(f) + for entity_dict in result['items']: + results_ids = c_processing.extract_all_ids(entity_dict, False) + br = results_ids[0] + expected_br = ['doi:10.2105/ajph.2006.101626', 'doi:10.1001/jama.299.12.1471', + 'doi:10.1177/003335490812300219', 'doi:10.1089/bsp.2008.0020', + 'doi:10.1097/01.ccm.0000151067.76074.21', 'doi:10.1177/003335490912400218', + 'doi:10.1097/dmp.0b013e31817196bf', 'doi:10.1056/nejmsa021807', + 'doi:10.1097/dmp.0b013e31819d977c', 'doi:10.1097/dmp.0b013e31819f1ae2', + 'doi:10.1097/dmp.0b013e318194898d', 'doi:10.1378/chest.07-2693', + 'doi:10.1016/s0196-0644(99)70224-6', 'doi:10.1097/01.ccm.0000151072.17826.72', + 'doi:10.1097/01.bcr.0000155527.76205.a2', 'doi:10.2105/ajph.2009.162677'] + self.assertEqual(set(expected_br), set(br)) + c_processing.storage_manager.delete_storage() + + def test_get_redis_validity_list(self): + c_processing = CrossrefProcessing() + br = {'doi:10.2105/ajph.2006.101626', 'doi:10.1001/jama.299.12.1471', + 'doi:10.1177/003335490812300219', 'doi:10.1089/bsp.2008.0020', + 'doi:10.1097/01.ccm.0000151067.76074.21', 'doi:10.1177/003335490912400218', + 'doi:10.1097/dmp.0b013e31817196bf', 'doi:10.1056/nejmsa021807', + 'doi:10.1097/dmp.0b013e31819d977c', 'doi:10.1097/dmp.0b013e31819f1ae2', + 'doi:10.1097/dmp.0b013e318194898d', 'doi:10.1378/chest.07-2693', + 'doi:10.1016/s0196-0644(99)70224-6', 'doi:10.1097/01.ccm.0000151072.17826.72', + 'doi:10.1097/01.bcr.0000155527.76205.a2', 'doi:10.2105/ajph.2009.162677'} + br_valid_list = c_processing.get_reids_validity_list(br, "br") + exp_br_valid_list = [] + self.assertEqual(br_valid_list, exp_br_valid_list) + c_processing.storage_manager.delete_storage() + + def test_get_redis_validity_list_redis(self): + c_processing = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + br = {'doi:10.2105/ajph.2006.101626', 'doi:10.1001/jama.299.12.1471', + 'doi:10.1177/003335490812300219', 'doi:10.1089/bsp.2008.0020', + 'doi:10.1097/01.ccm.0000151067.76074.21', 'doi:10.1177/003335490912400218', + 'doi:10.1097/dmp.0b013e31817196bf', 'doi:10.1056/nejmsa021807', + 'doi:10.1097/dmp.0b013e31819d977c', 'doi:10.1097/dmp.0b013e31819f1ae2', + 'doi:10.1097/dmp.0b013e318194898d', 'doi:10.1378/chest.07-2693', + 'doi:10.1016/s0196-0644(99)70224-6', 'doi:10.1097/01.ccm.0000151072.17826.72', + 'doi:10.1097/01.bcr.0000155527.76205.a2', 'doi:10.2105/ajph.2009.162677'} + ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'} + br_valid_list = c_processing.get_reids_validity_list(br, "br") + exp_br_valid_list = [] + ra_valid_list = c_processing.get_reids_validity_list(ra, "ra") + self.assertEqual(br_valid_list, exp_br_valid_list) + exp_ra_valid_list = [] + self.assertEqual(ra_valid_list, exp_ra_valid_list) + c_processing.storage_manager.delete_storage() + + def test_get_redis_validity_dict_w_fakeredis_db_values_sqlite(self): + c_processing = CrossrefProcessing() + c_processing.BR_redis.set('doi:10.2105/ajph.2006.101626', "omid:1") + c_processing.RA_redis.set('orcid:0000-0002-8090-6886', "omid:2") + + + br = {'doi:10.2105/ajph.2006.101626', 'doi:10.1001/jama.299.12.1471', + 'doi:10.1177/003335490812300219'} + ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'} + + br_validity_dict = c_processing.get_reids_validity_list(br, "br") + exp_br_valid_list = ['doi:10.2105/ajph.2006.101626'] + ra_validity_dict = c_processing.get_reids_validity_list(ra, "ra") + exp_ra_valid_list = ['orcid:0000-0002-8090-6886'] + self.assertEqual(br_validity_dict, exp_br_valid_list) + self.assertEqual(ra_validity_dict, exp_ra_valid_list) + + c_processing.storage_manager.delete_storage() + + c_processing.BR_redis.delete('doi:10.2105/ajph.2006.101626') + c_processing.RA_redis.delete('orcid:0000-0002-8090-6886') + + + def test_get_redis_validity_dict_w_fakeredis_db_values_redis(self): + c_processing = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + c_processing.BR_redis.set('doi:10.2105/ajph.2006.101626', "omid:1") + c_processing.RA_redis.set('orcid:0000-0002-8090-6886', "omid:2") + + + br = {'doi:10.2105/ajph.2006.101626', 'doi:10.1001/jama.299.12.1471', + 'doi:10.1177/003335490812300219'} + ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'} + + br_validity_dict = c_processing.get_reids_validity_list(br, "br") + exp_br_valid_list = ['doi:10.2105/ajph.2006.101626'] + ra_validity_dict = c_processing.get_reids_validity_list(ra, "ra") + exp_ra_valid_list = ['orcid:0000-0002-8090-6886'] + self.assertEqual(br_validity_dict, exp_br_valid_list) + self.assertEqual(ra_validity_dict, exp_ra_valid_list) + + c_processing.storage_manager.delete_storage() + + c_processing.BR_redis.delete('doi:10.2105/ajph.2006.101626') + c_processing.RA_redis.delete('orcid:0000-0002-8090-6886') + + def test_validated_as_default(self): + """ + Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: + string of the identifier, the method "validated_as" returns: + - True if the id was already validated as valid + - False if the id was already validated as invalid + - None if the id was not validated before + The procedure is tested + - With default storage manager (sqlite) without a pre-existent db associated + """ + + c_processing = CrossrefProcessing() + validate_as_none = c_processing.validated_as({"schema":"doi", "identifier": "doi:10.1001/10-v4n2-hsf10003"}) + self.assertEqual(validate_as_none, None) + c_processing.storage_manager.delete_storage() + + def test_validated_as_default_redis(self): + """ + Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: + string of the identifier, the method "validated_as" returns: + - True if the id was already validated as valid + - False if the id was already validated as invalid + - None if the id was not validated before + The procedure is tested + - With redis storage manager without a pre-existent db associated + """ + c_processing = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + validate_as_none = c_processing.validated_as({"schema": "doi", "identifier": "doi:10.1001/10-v4n2-hsf10003"}) + self.assertEqual(validate_as_none, None) + c_processing.storage_manager.delete_storage() + + def test_validated_as_sqlite(self): + """ + Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: + string of the identifier, the method "validated_as" returns: + - True if the id was already validated as valid + - False if the id was already validated as invalid + - None if the id was not validated before + The procedure is tested + - With sqlite storage manager without a pre-existent db associated + - With sqlite storage manager and a pre-existent db associated + """ + + db_path = os.path.join(TMP_SUPPORT_MATERIAL, "db_path.db") + + sqlite_man = SqliteStorageManager(db_path) + valid_doi_not_in_db = {"identifier":"doi:10.1001/2012.jama.10158", "schema":"doi"} + valid_doi_in_db = {"identifier":"doi:10.1001/2012.jama.10368", "schema":"doi"} + invalid_doi_in_db = {"identifier":"doi:10.1001/2012.jama.1036", "schema":"doi"} + sqlite_man.set_value(valid_doi_in_db["identifier"], True) + sqlite_man.set_value(invalid_doi_in_db["identifier"], False) + + # New class instance to check the correct task management with a sqlite db in input + c_processing_sql = CrossrefProcessing(storage_manager=sqlite_man) + validated_as_True = c_processing_sql.validated_as(valid_doi_in_db) + validated_as_False = c_processing_sql.validated_as(invalid_doi_in_db) + not_validated = c_processing_sql.validated_as(valid_doi_not_in_db) + + self.assertEqual(validated_as_True, True) + self.assertEqual(validated_as_False, False) + self.assertEqual(not_validated, None) + + c_processing_sql.storage_manager.delete_storage() + + def test_validated_as_inmemory(self): + """ + Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: + string of the identifier, the method "validated_as" returns: + - True if the id was already validated as valid + - False if the id was already validated as invalid + - None if the id was not validated before + The procedure is tested + - With in Memory + Json storage manager and a pre-existent db associated + - With in Memory + Json storage manager without a pre-existent db associated + """ + db_json_path = os.path.join(TMP_SUPPORT_MATERIAL, "db_path.json") + + inmemory_man = InMemoryStorageManager(db_json_path) + valid_doi_not_in_db = {"identifier": "doi:10.1001/2012.jama.10158", "schema": "doi"} + valid_doi_in_db = {"identifier": "doi:10.1001/2012.jama.10368", "schema": "doi"} + invalid_doi_in_db = {"identifier": "doi:10.1001/2012.jama.1036", "schema": "doi"} + inmemory_man.set_value(valid_doi_in_db["identifier"], True) + inmemory_man.set_value(invalid_doi_in_db["identifier"], False) + + c_processing = CrossrefProcessing(storage_manager=inmemory_man) + validated_as_True = c_processing.validated_as(valid_doi_in_db) + validated_as_False = c_processing.validated_as(invalid_doi_in_db) + not_validated = c_processing.validated_as(valid_doi_not_in_db) + + self.assertEqual(validated_as_True, True) + self.assertEqual(validated_as_False, False) + self.assertEqual(not_validated, None) + + c_processing.storage_manager.delete_storage() + + def test_validated_as_redis(self): + """ + Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: + string of the identifier, the method "validated_as" returns: + - True if the id was already validated as valid + - False if the id was already validated as invalid + - None if the id was not validated before + The procedure is tested + - With REDIS storage manager and a pre-existent db associated + - With REDIS storage manager without a pre-existent db associated + """ + + redis_man = RedisStorageManager(testing=True) + valid_doi_not_in_db = {"identifier": "doi:10.1001/2012.jama.10158", "schema": "doi"} + valid_doi_in_db = {"identifier": "doi:10.1001/2012.jama.10368", "schema": "doi"} + invalid_doi_in_db = {"identifier": "doi:10.1001/2012.jama.1036", "schema": "doi"} + redis_man.set_value(valid_doi_in_db["identifier"], True) + redis_man.set_value(invalid_doi_in_db["identifier"], False) + + # New class instance to check the correct task management with a redis manager using a db with data + c_processing_redis = CrossrefProcessing(storage_manager=redis_man) + validated_as_True = c_processing_redis.validated_as(valid_doi_in_db) + validated_as_False = c_processing_redis.validated_as(invalid_doi_in_db) + not_validated = c_processing_redis.validated_as(valid_doi_not_in_db) + self.assertEqual(validated_as_True, True) + self.assertEqual(validated_as_False, False) + self.assertEqual(not_validated, None) + c_processing_redis.storage_manager.delete_storage() + + def test_get_id_manager(self): + """Check that, given in input the string of a schema (e.g.:'pmid') or an id with a prefix (e.g.: 'pmid:12334') + and a dictionary mapping the strings of the schemas to their id managers, the method returns the correct + id manager. Note that each instance of the Preprocessing class needs its own instances of the id managers, + in order to avoid conflicts while validating data""" + + c_processing = CrossrefProcessing() + id_man_dict = c_processing.venue_id_man_dict + + issn_id = "issn:0003-987X" + issn_string = "issn" + issn_man_exp = c_processing.get_id_manager(issn_id, id_man_dict) + issn_man_exp_2 = c_processing.get_id_manager(issn_string, id_man_dict) + + #check that the idmanager for the issn was returned and that it works as expected + self.assertTrue(issn_man_exp.is_valid(issn_id)) + self.assertTrue(issn_man_exp_2.is_valid(issn_id)) + + def test_csv_creator(self): + c_processing = CrossrefProcessing(orcid_index=IOD, doi_csv=WANTED_DOIS_FOLDER, publishers_filepath=None) + data = load_json(DATA, None) + output = list() + for item in data['items']: + tabular_data = c_processing.csv_creator(item) + if tabular_data: + output.append(tabular_data) + expected_output = [ + {'id': 'doi:10.47886/9789251092637.ch7', 'title': 'Freshwater, Fish and the Future: Proceedings of the Global Cross-Sectoral Conference', 'author': '', 'pub_date': '2016', 'venue': 'Freshwater, Fish and the Future: Proceedings of the Global Cross-Sectoral Conference', 'volume': '', 'issue': '', 'page': '', 'type': 'book chapter', 'publisher': 'American Fisheries Society [crossref:460]', 'editor': 'Lymer, David; Food and Agriculture Organization of the United Nations Fisheries and Aquaculture Department Viale delle Terme di Caracalla Rome 00153 Italy; Marttin, Felix; Marmulla, Gerd; Bartley, Devin M.'}, + {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0002-6227-4053]; Mun, Ji-Hye [orcid:0000-0002-6227-4053]; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': ''}, + {'id': 'doi:10.9799/ksfan.2012.25.1.105', 'title': 'A Study on Dietary Habit and Eating Snack Behaviors of Middle School Students with Different Obesity Indexes in Chungnam Area', 'author': 'Kim, Myung-Hee; Seo, Jin-Seon; Choi, Mi-Kyeong [orcid:0000-0002-6227-4053]; Kim, Eun-Young', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '105-115', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': ''}, + {'id': 'doi:10.9799/ksfan.2012.25.1.123', 'title': 'The Protective Effects of Chrysanthemum cornarium L. var. spatiosum Extract on HIT-T15 Pancreatic β-Cells against Alloxan-induced Oxidative Stress', 'author': 'Kim, In-Hye; Cho, Kang-Jin; Ko, Jeong-Sook; Kim, Jae-Hyun; Om, Ae-Son', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '123-131', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': ''} + ] + self.assertEqual(output, expected_output) + + def test_csv_creator_cited(self): + c_processing_cited = CrossrefProcessing(orcid_index=IOD, publishers_filepath=None, citing=False) + with open(JSON_FILE, encoding="utf8") as f: + result = json.load(f) + output = list() + for item in result['items']: + if item.get("reference"): + # filtering out entities without citations + has_doi_references = [x for x in item["reference"] if x.get("DOI")] + if has_doi_references: + for reference_dict in has_doi_references: + tabular_data = c_processing_cited.csv_creator(reference_dict) + if tabular_data: + output.append(tabular_data) + expected_output =[ + {'id': 'doi:10.2105/ajph.2006.101626', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1001/jama.299.12.1471', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1177/003335490812300219', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1089/bsp.2008.0020', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/01.ccm.0000151067.76074.21', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1177/003335490912400218', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/dmp.0b013e31817196bf', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1056/nejmsa021807', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/dmp.0b013e31819d977c', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/dmp.0b013e31819f1ae2', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/dmp.0b013e318194898d', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1378/chest.07-2693', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1016/s0196-0644(99)70224-6', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/01.ccm.0000151072.17826.72', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.1097/01.bcr.0000155527.76205.a2', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, + {'id': 'doi:10.2105/ajph.2009.162677', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}] + self.assertEqual(output, expected_output) + + def test_get_pages(self): + item = { + 'page': '469-476' + } + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + pages = crossref_processor.get_crossref_pages(item) + self.assertEqual(pages, '469-476') + + def test_get_pages_right_letter(self): + item = { + 'page': 'G22' + } + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + pages = crossref_processor.get_crossref_pages(item) + self.assertEqual(pages, 'G22-G22') + + def test_get_pages_wrong_letter(self): + item = { + 'page': '583b-584' + } + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + pages = crossref_processor.get_crossref_pages(item) + self.assertEqual(pages, '583-584') + + def test_get_pages_roman_letters(self): + item = { + 'page': 'iv-l' + } + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + pages = crossref_processor.get_crossref_pages(item) + self.assertEqual(pages, 'iv-l') + + def test_get_pages_non_roman_letters(self): + item = { + 'page': 'kj-hh' + } + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + pages = crossref_processor.get_crossref_pages(item) + self.assertEqual(pages, '') + + def test_load_publishers_mapping(self): + output = CrossrefProcessing.load_publishers_mapping(publishers_filepath=PUBLISHERS_MAPPING) + expected_output = { + '1': {'name': 'Annals of Family Medicine', 'prefixes': {'10.1370'}}, + '2': {'name': 'American Association of Petroleum Geologists AAPG/Datapages', 'prefixes': {'10.15530', '10.1306'}}, + '3': {'name': 'American Association of Physics Teachers (AAPT)','prefixes': {'10.1119'}}, + '6': {'name': 'American College of Medical Physics (ACMP)','prefixes': {'10.1120'}}, + '9': {'name': 'Allen Press', 'prefixes': {'10.1043'}}, + '10': {'name': 'American Medical Association (AMA)', 'prefixes': {'10.1001'}}, + '11': {'name': 'American Economic Association', 'prefixes': {'10.1257'}}, + '460': {'name': 'American Fisheries Society', 'prefixes': {'10.1577', '10.47886'}} + } + self.assertEqual(output, expected_output) + + def test_get_publisher_name(self): + # The item's member is in the publishers' mapping + item = { + 'publisher': 'American Fisheries Society', + 'DOI': '10.47886\/9789251092637.ch7', + 'prefix': '10.47886', + 'member': '460' + } + doi = '10.47886/9789251092637.ch7' + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + publisher_name = crossref_processor.get_publisher_name(doi, item) + self.assertEqual(publisher_name, 'American Fisheries Society [crossref:460]') + + def test_get_publisher_name_no_member(self): + # The item has no member, but the DOI prefix is the publishers' mapping + item = { + 'publisher': 'American Fisheries Society', + 'DOI': '10.47886/9789251092637.ch7', + 'prefix': '10.47886' + } + doi = '10.47886/9789251092637.ch7' + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + publisher_name = crossref_processor.get_publisher_name(doi, item) + self.assertEqual(publisher_name, 'American Fisheries Society [crossref:460]') + + def test_get_venue_name(self): + item = { + 'container-title': ['Cerebrospinal Fluid [Working Title]'], + } + row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''} + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + venue_name = crossref_processor.get_venue_name(item, row) + self.assertEqual(venue_name, 'Cerebrospinal Fluid [Working Title]') + + def test_get_venue_name_with_ISSN(self): + item = { + "container-title": ["Disaster Medicine and Public Health Preparedness"], + "ISSN": ["1935-7893", "1938-744X"] + } + row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', + 'type': 'journal article', 'publisher': '', 'editor': ''} + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + venue_name = crossref_processor.get_venue_name(item, row) + self.assertEqual(venue_name, 'Disaster Medicine and Public Health Preparedness [issn:1935-7893 issn:1938-744X]') + + def test_find_crossref_orcid(self): + cp = CrossrefProcessing() + inp = '0000-0001-9759-3938' + out = cp.find_crossref_orcid(inp) + exp = "orcid:0000-0001-9759-3938" + self.assertEqual(out, exp) + + inp_invalid_id = '5500-0001-9759-3938' + out_invalid_id = cp.find_crossref_orcid(inp_invalid_id) + exp_invalid_id = "" + self.assertEqual(out_invalid_id, exp_invalid_id) + + cp.storage_manager.delete_storage() + + # set a valid id as invalid in storage, so to check that the api check is + # avoided if the info is already in storage + cp = CrossrefProcessing() + cp.storage_manager.set_value("orcid:0000-0001-9759-3938", False) + + inp = '0000-0001-9759-3938' + out = cp.find_crossref_orcid(inp) + exp = "" + self.assertEqual(out, exp) + cp.storage_manager.delete_storage() + + cp = CrossrefProcessing() + cp.storage_manager.set_value("orcid:0000-0001-9759-3938", True) + inp = '0000-0001-9759-3938' + out = cp.find_crossref_orcid(inp) + exp = "orcid:0000-0001-9759-3938" + self.assertEqual(out, exp) + cp.storage_manager.delete_storage() + + def test_report_series_venue_id(self): + crossref_processor = CrossrefProcessing(orcid_index=IOD, doi_csv=WANTED_DOIS_FOLDER, publishers_filepath=None) + items = {'items': [{ + 'DOI': '10.1007/978-3-030-00668-6_8', + 'container-title': ["troitel'stvo: nauka i obrazovanie [Construction: Science and Education]"], + 'ISSN': '2305-5502', + 'type': 'report-series' + }]} + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + output = list() + for item in items['items']: + output.append(crossref_processor.csv_creator(item)) + expected_output = [{'id': 'doi:10.1007/978-3-030-00668-6_8', 'title': '', 'author': '', 'pub_date': '', 'venue': "troitel'stvo: nauka i obrazovanie [Construction: Science and Education] [issn:2305-5502]", 'volume': '', 'issue': '', 'page': '', 'type': 'report series', 'publisher': '', 'editor': ''}] + self.assertEqual(output, expected_output) + + def test_report_series_br_id(self): + crossref_processor = CrossrefProcessing(orcid_index=IOD, doi_csv=WANTED_DOIS_FOLDER, publishers_filepath=None) + items = {'items': [{ + 'DOI': '10.1007/978-3-030-00668-6_8', + 'container-title': [], + 'ISSN': '2305-5502', + 'type': 'report-series' + }]} + crossref_processor = CrossrefProcessing(orcid_index=None, doi_csv=None, publishers_filepath=PUBLISHERS_MAPPING) + output = list() + for item in items['items']: + output.append(crossref_processor.csv_creator(item)) + expected_output = [{'id': 'doi:10.1007/978-3-030-00668-6_8 issn:2305-5502', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': 'report series', 'publisher': '', 'editor': ''}] + self.assertEqual(output, expected_output) + + def test_get_agents_strings_list(self): + authors_list = [ + { + 'given': 'Myung-Hee', + 'family': 'Kim', + 'affiliation': [], + "role": "author" + }, + { + 'given': 'Jin-Seon', + 'family': 'Seo', + 'affiliation': [], + "role": "author" + }, + { + 'given': 'Mi-Kyeong', + 'family': 'Choi', + 'affiliation': [], + "role": "author" + }, + { + 'given': 'Eun-Young', + 'family': 'Kim', + 'affiliation': [], + "role": "author" + } + ] + crossref_processor = CrossrefProcessing(IOD, WANTED_DOIS_FOLDER) + authors_strings_list, _ = crossref_processor.get_agents_strings_list('10.9799/ksfan.2012.25.1.105', + authors_list) + expected_authors_list = ['Kim, Myung-Hee', 'Seo, Jin-Seon', 'Choi, Mi-Kyeong [orcid:0000-0002-6227-4053]', + 'Kim, Eun-Young'] + self.assertEqual(authors_strings_list, expected_authors_list) + + def test_get_agents_strings_list_same_family(self): + # Two authors have the same family name and the same given name initials + authors_list = [ + { + 'given': 'Mi-Kyeong', + 'family': 'Choi', + 'affiliation': [], + "role": "author" + }, + { + 'given': 'Mi-Hong', + 'family': 'Choi', + 'affiliation': [], + "role": "author" + } + ] + crossref_processor = CrossrefProcessing(IOD, WANTED_DOIS_FOLDER) + authors_strings_list, _ = crossref_processor.get_agents_strings_list('10.9799/ksfan.2012.25.1.105', + authors_list) + expected_authors_list = ['Choi, Mi-Kyeong [orcid:0000-0002-6227-4053]', 'Choi, Mi-Hong'] + self.assertEqual(authors_strings_list, expected_authors_list) + + def test_get_agents_strings_list_homonyms(self): + # Two authors have the same family name and the same given name + authors_list = [ + { + 'given': 'Mi-Kyeong', + 'family': 'Choi', + 'affiliation': [], + "role": "author" + }, + { + 'given': 'Mi-Kyeong', + 'family': 'Choi', + 'affiliation': [], + "role": "author" + } + ] + crossref_processor = CrossrefProcessing(IOD, WANTED_DOIS_FOLDER) + authors_strings_list, _ = crossref_processor.get_agents_strings_list('10.9799/ksfan.2012.25.1.105', + authors_list) + expected_authors_list = ['Choi, Mi-Kyeong', 'Choi, Mi-Kyeong'] + self.assertEqual(authors_strings_list, expected_authors_list) + + def test_get_agents_strings_list_inverted_names(self): + # One author with an ORCID has as a name the surname of another + authors_list = [ + { + 'given': 'Choi', + 'family': 'Mi-Kyeong', + 'affiliation': [], + "role": "author" + }, + { + 'given': 'Mi-Hong', + 'family': 'Choi', + 'affiliation': [], + "role": "author" + } + ] + crossref_processor = CrossrefProcessing(IOD, WANTED_DOIS_FOLDER) + authors_strings_list, _ = crossref_processor.get_agents_strings_list('10.9799/ksfan.2012.25.1.105', + authors_list) + expected_authors_list = ['Mi-Kyeong, Choi', 'Choi, Mi-Hong'] + self.assertEqual(authors_strings_list, expected_authors_list) + + def test_get_agents_strings_list_overlapping_surnames(self): + # The surname of one author is included in the surname of another. + authors_list = [ + { + "given": "Puvaneswari", + "family": "Paravamsivam", + "sequence": "first", + "affiliation": [], + "role": "author" + }, + { + "given": "Chua Kek", + "family": "Heng", + "sequence": "additional", + "affiliation": [], + "role": "author" + }, + { + "given": "Sri Nurestri Abdul", + "family": "Malek", + "sequence": "additional", + "affiliation": [], + "role": "author" + }, + { + "given": "Vikineswary", + "family": "Sabaratnam", + "sequence": "additional", + "affiliation": [], + "role": "author" + }, + { + "given": "Ravishankar Ram", + "family": "M", + "sequence": "additional", + "affiliation": [], + "role": "author" + }, + { + "given": "Sri Nurestri Abdul", + "family": "Malek", + "sequence": "additional", + "affiliation": [], + "role": "editor" + }, + { + "given": "Umah Rani", + "family": "Kuppusamy", + "sequence": "additional", + "affiliation": [], + "role": "author" + } + ] + crossref_processor = CrossrefProcessing(None, None) + csv_manager = CSVManager() + csv_manager.data = {'10.9799/ksfan.2012.25.1.105': {'Malek, Sri Nurestri Abdul [0000-0001-6278-8559]'}} + crossref_processor.orcid_index = csv_manager + authors_strings_list, editors_strings_list = crossref_processor.get_agents_strings_list('10.9799/ksfan.2012.25.1.105', authors_list) + expected_authors_list = ['Paravamsivam, Puvaneswari', 'Heng, Chua Kek', 'Malek, Sri Nurestri Abdul [orcid:0000-0001-6278-8559]', 'Sabaratnam, Vikineswary', 'M, Ravishankar Ram', 'Kuppusamy, Umah Rani'] + expected_editors_list = ['Malek, Sri Nurestri Abdul [orcid:0000-0001-6278-8559]'] + self.assertEqual((authors_strings_list, editors_strings_list), (expected_authors_list, expected_editors_list)) + + def test_id_worker(self): + field_issn = 'ISSN 1050-124X' + field_isbn = ['978-1-56619-909-4'] + issn_list = list() + isbn_list = list() + CrossrefProcessing.id_worker(field_issn, issn_list, CrossrefProcessing.issn_worker) + CrossrefProcessing.id_worker(field_isbn, isbn_list, CrossrefProcessing.isbn_worker) + expected_issn_list = ['issn:1050-124X'] + expected_isbn_list = ['isbn:9781566199094'] + self.assertEqual((issn_list, isbn_list), (expected_issn_list, expected_isbn_list)) + + def test_to_validated_id_list(self): + # NOTE: in tests using the sqlite storage method it must be avoided to delete the storage + # while using the same CrossrefProcessing() instance, otherwise the process would try to + # store data in a filepath that has just been deleted, with no new connection created after it. + + # 2 OPTIONS: 1) instantiate CrossrefProcessing only once at the beginning and delete the + # storage only at the end; 2) create a new CrossrefProcessing instance at every check and + # delete the storage each time after the check is done. + + cp = CrossrefProcessing() + # CASE_1: is valid + inp_1 = {'id':'doi:10.13039/100005522', 'schema':'doi'} + out_1 = cp.to_validated_id_list(inp_1) + exp_1 = ['doi:10.13039/100005522'] + self.assertEqual(out_1, exp_1) + cp.storage_manager.delete_storage() + + cp = CrossrefProcessing() + # CASE_2: is invalid + inp_2 = {'id':'doi:10.1089/bsp.2008.002', 'schema':'doi'} + out_2 = cp.to_validated_id_list(inp_2) + exp_2 = [] + self.assertEqual(out_2, exp_2) + + cp = CrossrefProcessing() + # CASE_3: valid orcid + inp_3 = {'id': 'orcid:0000-0003-4082-1500', 'schema':'orcid'} + out_3 = cp.to_validated_id_list(inp_3) + exp_3 = ['orcid:0000-0003-4082-1500'] + self.assertEqual(out_3, exp_3) + cp.storage_manager.delete_storage() + + cp= CrossrefProcessing() + #CASE_4: invalid doi in self._redis_values_br + inp_4 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'} + cp._redis_values_br.append(inp_4['id']) + out_4 = cp.to_validated_id_list(inp_4) + exp_4 = ['doi:10.1089/bsp.2008.002'] + self.assertEqual(out_4, exp_4) + value=cp.tmp_doi_m.storage_manager.get_value('doi:10.1089/bsp.2008.002') + self.assertEqual(value, True) + cp.storage_manager.delete_storage() + + + def test_to_validated_id_list_redis(self): + cp = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + # CASE_1: is valid + inp_1 = {'id': 'doi:10.13039/100005522', 'schema': 'doi'} + out_1 = cp.to_validated_id_list(inp_1) + exp_1 = ['doi:10.13039/100005522'] + self.assertEqual(out_1, exp_1) + cp.storage_manager.delete_storage() + + cp = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + # CASE_2: is invalid + inp_2 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'} + out_2 = cp.to_validated_id_list(inp_2) + exp_2 = [] + self.assertEqual(out_2, exp_2) + + cp = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + # CASE_3: valid orcid + inp_3 = {'id': 'orcid:0000-0003-4082-1500', 'schema': 'orcid'} + out_3 = cp.to_validated_id_list(inp_3) + exp_3 = ['orcid:0000-0003-4082-1500'] + self.assertEqual(out_3, exp_3) + cp.storage_manager.delete_storage() + + cp = CrossrefProcessing(storage_manager=RedisStorageManager(testing=True)) + # CASE_4: invalid doi in self._redis_values_br + inp_4 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'} + cp._redis_values_br.append(inp_4['id']) + out_4 = cp.to_validated_id_list(inp_4) + exp_4 = ['doi:10.1089/bsp.2008.002'] + self.assertEqual(out_4, exp_4) + value = cp.tmp_doi_m.storage_manager.get_value('doi:10.1089/bsp.2008.002') + self.assertEqual(value, True) + cp.storage_manager.delete_storage() + + + + + + + + + + + + + + + + + + + diff --git a/test/jalc_processing_test.py b/test/jalc_processing_test.py index 8f69c71..5c1d5c4 100644 --- a/test/jalc_processing_test.py +++ b/test/jalc_processing_test.py @@ -1437,46 +1437,6 @@ def test_get_venue(self): venue_name = jalc_processor.get_venue(item_dict) self.assertEqual(venue_name, 'The Journal of Space Technology and Science [issn:0911-551X issn:2186-4772 jid:jsts]') - def test_to_validated_venue_id_list(self): - id_dict_list_1 = [{ - "journal_id": "1880-3016", - "type": "ISSN", - "issn_type": "print" - }, - { - "journal_id": "1880-3024", - "type": "ISSN", - "issn_type": "online" - }, - { - "journal_id": "jdsa", - "type": "JID" - }] - id_dict_list_2 = [{ - "journal_id": "1880-3016", - "type": "ISSN", - "issn_type": "print" - }, - { - "journal_id": "1880-3024", - "type": "ISSN", - "issn_type": "online" - }, - { - "journal_id": "jdsa1623", - "type": "JID" - }] - - expected1 = ["issn:1880-3016", "issn:1880-3024", "jid:jdsa"] - expected2 = ["issn:1880-3016", "issn:1880-3024"] - - jalc_processor = JalcProcessing() - outp = jalc_processor.to_validated_venue_id_list(id_dict_list_1) - # the JID id is not valid - outp2 = jalc_processor.to_validated_venue_id_list(id_dict_list_2) - - self.assertEqual(outp, expected1) - self.assertEqual(outp2, expected2) def test_get_venue_without_full(self): item_dict = { @@ -4510,6 +4470,15 @@ def test_get_ja_without_japanese(self): expected_out = [{"lang": "en", "title": "Ozone Measurement by MT-135 Rocket"}] self.assertEqual(en, expected_out) + def test_to_validated_id_list(self): + inp_1 = 'doi:10.13039/100005522' + j_p = JalcProcessing() + # CASE1_1: No already validated ids + 1 id to be validated, which is valid + out_1 = j_p.to_validated_id_list(inp_1) + exp_1 = ['doi:10.13039/100005522'] + self.assertEqual(out_1, exp_1) + j_p.storage_manager.delete_storage() + if __name__ == '__main__': unittest.main() \ No newline at end of file