Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Revision of Zenodo update script #3043

Closed
wants to merge 2 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 56 additions & 53 deletions tools/update_zenodo.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,15 @@
#!/usr/bin/env python3
"""Update and sort the creators list of the zenodo record."""
import sys
import shutil
from pathlib import Path
import json
from fuzzywuzzy import fuzz, process
import shutil
import os
import subprocess as sp

if os.path.exists('line-contributions.txt'):
with open('line-contributions.txt', 'rt') as fp:
lines = fp.readlines()
else:
if shutil.which('git-line-summary'):
print("Running git-line-summary on nipype repo")
lines = sp.check_output(['git-line-summary']).decode().split('\n')
else:
raise RuntimeError("Install Git Extras to view git contributors")

data = [' '.join(line.strip().split()[1:-1]) for line in lines if '%' in line]

# load zenodo from master
with open('.zenodo.json', 'rt') as fp:
zenodo = json.load(fp)
zen_names = [' '.join(val['name'].split(',')[::-1]).strip()
for val in zenodo['creators']]

name_matches = []

for ele in data:
matches = process.extract(ele, zen_names, scorer=fuzz.token_sort_ratio,
limit=2)
# matches is a list [('First match', % Match), ('Second match', % Match)]
if matches[0][1] > 80:
val = zenodo['creators'][zen_names.index(matches[0][0])]
else:
# skip unmatched names
print("No entry to sort:", ele)
continue

if val not in name_matches:
name_matches.append(val)

CREATORS_LAST_ORCID = '0000-0002-5312-6729' # This ORCID should go last
# for entries not found in line-contributions
missing_entries = [
MISSING_ENTRIES = [
{"name": "Varada, Jan"},
{"name": "Schwabacher, Isaac"},
{"affiliation": "Child Mind Institute / Nathan Kline Institute",
Expand All @@ -61,31 +30,65 @@
{"name": "Lai, Jeff"}
]

for entry in missing_entries:
name_matches.append(entry)


def fix_position(creators):
"""Place Satra last."""
# position first / last authors
f_authr = None
l_authr = None

for i, info in enumerate(creators):
if info['name'] == 'Gorgolewski, Krzysztof J.':
f_authr = i
if info['name'] == 'Ghosh, Satrajit':
l_authr = i
for info in creators:
if 'orcid' in info and info['orcid'] == CREATORS_LAST_ORCID:
l_authr = info

if f_authr is None or l_authr is None:
if l_authr is None:
raise AttributeError('Missing important people')

creators.insert(0, creators.pop(f_authr))
creators.insert(len(creators), creators.pop(l_authr + 1))
creators.remove(l_authr)
creators.append(l_authr)
return creators


zenodo['creators'] = fix_position(name_matches)
if __name__ == '__main__':
contrib_file = Path('line-contributors.txt')
lines = []
if contrib_file.exists():
print('WARNING: Reusing existing line-contributors.txt file.', file=sys.stderr)
lines = contrib_file.read_text().splitlines()

with open('.zenodo.json', 'wt') as fp:
json.dump(zenodo, fp, indent=2, sort_keys=True)
fp.write('\n')
if not lines and shutil.which('git-line-summary'):
print("Running git-line-summary on nipype repo")
lines = sp.check_output(['git-line-summary']).decode().splitlines()
contrib_file.write_text('\n'.join(lines))

if not lines:
raise RuntimeError('Could not find line-contributors from git repository '
'(hint: please install git-extras).')

data = [' '.join(line.strip().split()[1:-1]) for line in lines if '%' in line]

# load zenodo from master
zenodo_file = Path('.zenodo.json')
zenodo = json.loads(zenodo_file.read_text())
zen_names = [' '.join(val['name'].split(',')[::-1]).strip()
for val in zenodo['creators']]

name_matches = []
for ele in data:
matches = process.extract(ele, zen_names, scorer=fuzz.token_sort_ratio,
limit=2)
# matches is a list [('First match', % Match), ('Second match', % Match)]
if matches[0][1] > 80:
val = zenodo['creators'][zen_names.index(matches[0][0])]
else:
# skip unmatched names
print("No entry to sort:", ele)
continue

if val not in name_matches:
name_matches.append(val)

for entry in MISSING_ENTRIES:
name_matches.append(entry)

zenodo['creators'] = fix_position(name_matches)
zenodo_file.write_text(json.dumps(zenodo, indent=2, sort_keys=True))