Skip to content

Commit

Permalink
New strategy to check unique citing entites
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanhb committed Sep 9, 2024
1 parent fcceb6e commit c10ae3f
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 16 deletions.
2 changes: 1 addition & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ services=COCI,POCI,CROCI,DOCI,JOCI,INDEX
# Available identifiers type
identifiers=doi,pmid,omid
# All BR IDs handled in META
br_ids=doi,issn,isbn,pmid,pmcid,url,wikidata,wikipedia,jid,arxiv
br_ids=doi,issn,isbn,pmid,pmcid,url,wikidata,wikipedia,jid,arxiv,openalex
# All RA IDs handled in META
ra_ids=crossref,orcid,viaf,wikidata,ror
# Redis all citations in OpenCitations INDEX – <CITED-OMID>:[ <CITING-OMID-1>, <CITING-OMID-2>, ..., <CITING-OMID-N> ]
Expand Down
59 changes: 44 additions & 15 deletions scripts/anyid_citation_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
#
# v2.2
# New strategy to check unique citing entites
#
# v2.1
# Citations map could be created also using a CSV dump instead of Redis only
#
Expand All @@ -38,15 +41,14 @@

csv.field_size_limit(sys.maxsize)
config = get_config()
conf_br_ids = config.get("cnc", "br_ids").split(",")

'''
To create the omid map using the META BRs index (in CSV)
The META BRs index should be previously generated using 'meta2redis' command
'''
def read_omid_map(f_omidmap):
global config

conf_br_ids = config.get("cnc", "br_ids").split(",")
global conf_br_ids

omid_map = defaultdict(set)
with open(f_omidmap, mode='r') as file:
Expand Down Expand Up @@ -93,19 +95,46 @@ def read_omid_citations_index(f_omid_citations_index):
'''
def count_unique_cits(citing_omids, omid_map):

# Get the ANYIDs of the citing OMIDS
# Count the unique ones
unique_brs_anyid = []
global conf_br_ids
cits_count = 0

# create a set for each different any_id
idpref_index = {id_pref: {} for id_pref in conf_br_ids}

# check if each anyid value of the citing entity is unique
for a_citing_omid in citing_omids:
_c_intersection = 0
s_citing_anyids = omid_map[a_citing_omid].copy()
for __unique in unique_brs_anyid:
_c_intersection += len(__unique.intersection(s_citing_anyids))
# if there is no common anyids with the other br entities
if _c_intersection == 0:
unique_brs_anyid.append(s_citing_anyids)

return len(unique_brs_anyid)
is_unique_citing = True
for __a_citing_anyid in omid_map[a_citing_omid]:
for id_pref in conf_br_ids:
if __a_citing_anyid.startswith(id_pref):
is_unique_citing = is_unique_citing and (not (__a_citing_anyid in idpref_index[id_pref]))
idpref_index[id_pref].add( __a_citing_anyid )

if is_unique_citing:
cits_count += 1

# empty all
for id_pref in idpref_index:
idpref_index[id_pref] = None
idpref_index = None

return cits_count

# --- Previous version
# --------------------
# # Get the ANYIDs of the citing OMIDS
# # Count the unique ones
# unique_brs_anyid = []
# for a_citing_omid in citing_omids:
# _c_intersection = 0
# s_citing_anyids = omid_map[a_citing_omid].copy()
# for __unique in unique_brs_anyid:
# _c_intersection += len(__unique.intersection(s_citing_anyids))
# # if there is no common anyids with the other br entities
# if _c_intersection == 0:
# unique_brs_anyid.append(s_citing_anyids)
#
# return len(unique_brs_anyid)


def main():
Expand Down

0 comments on commit c10ae3f

Please sign in to comment.