-
Notifications
You must be signed in to change notification settings - Fork 1
/
load-csv-map2.py
59 lines (49 loc) · 1.59 KB
/
load-csv-map2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import sys
import csv
import json
import time
from dotenv import load_dotenv
from pipeline.config import Config
# Load from CSV to LMDB via index_loader in merged config
# Usage load-csv-map.py --same|--different <path/to/file.csv>
# or: load-csv-map.py --all [--clear]
load_dotenv()
basepath = os.getenv('LUX_BASEPATH', "")
cfgs = Config(basepath=basepath)
idmap = cfgs.get_idmap()
cfgs.cache_globals()
cfgs.instantiate_all()
# Load CSV file to given Index
loader = cfgs.results['merged']['indexLoader']
if '--all' in sys.argv:
# process based on directories
same_dir = os.path.join(cfgs.data_dir, "sameAs")
diff_dir = os.path.join(cfgs.data_dir, "differentFrom")
if '--clear' in sys.argv:
loader.clear('equivs')
loader.clear('diffs')
for csvfn in os.listdir(same_dir):
if csvfn.endswith('.csv'):
print(f" -- Adding {csvfn} to sameAs map")
fn = os.path.join(same_dir, csvfn)
loader.load(fn, "equivs")
for csvfn in os.listdir(diff_dir):
if csvfn.endswith('.csv'):
print(f" -- Adding {csvfn} to diff map")
fn = os.path.join(diff_dir, csvfn)
loader.load(fn, "diffs")
else:
if '--same' in sys.argv:
which = "equivs"
elif '--different' in sys.argv:
which = "diffs"
else:
print("You must give --all, or either --same or --different when loading maps")
sys.exit(0)
csvfn = sys.argv[-1]
if not os.path.exists(csvfn):
print(f"That file ({csvfn}) does not exist")
sys.exit(0)
loader.load(csvfn, which)
print("Done")