-
Notifications
You must be signed in to change notification settings - Fork 2
/
export_to_ckan.py
94 lines (82 loc) · 3.33 KB
/
export_to_ckan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import psycopg2.extras
import psycopg2
import urllib2
import urllib
import json
import pprint
import uuid
import os
import metautils
from dbsettings import settings
metautils.setsettings(settings)
#This is a one time operation to create data based on the ODM DB
#Updates should be done using harvesters
url = os.environ['CKANURL']
apikey = os.environ['CKANAPIKEY']
dict_cur = metautils.getDBCursor(settings, dictCursor = True)
#Take all accepted data but not from catalogs except for Wuppertal/Wennigsen
dict_cur.execute("SELECT * FROM data WHERE accepted = %s AND (source NOT LIKE %s OR city LIKE %s OR city LIKE %s)", (True, 'd', 'wennigsen', 'wuppertal'))
def category_to_group(groupname):
# maybe some other id
return {'name': metautils.force_alphanumeric_short(groupname)}
def openmap(open):
if open is None:
return 'Unbekannt'
elif open:
return 'Offen'
else:
return 'Nicht offen'
def dBtoCKAN(rec):
d = {}
d['owner_org'] = rec['city']
d['state'] = 'active' if rec['accepted'] else 'deleted'
d['url'] = rec['url']
d['title'] = rec['title']
d['name'] = str(uuid.uuid4()) #Must be unique, our titles are not
if rec['description'] is not None:
d['notes'] = rec['description']
d['extras'] = []
if rec['temporalextent'] is not None:
d['extras'].append({'key': 'temporalextent', 'value': rec['temporalextent']})
d['extras'].append({'key': 'metadata_source_type', 'value': metautils.convert_source_dict[rec['source']]})
if rec['originating_portal'] is not None:
d['extras'].append({'key': 'metadata_source_portal', 'value': rec['originating_portal']})
if rec['metadata'] is not None:
d['extras'].append({'key': 'original_metadata_json', 'value': rec['metadata']})
if rec['metadata_xml'] is not None:
d['extras'].append({'key': 'original_metadata_xml', 'value': rec['metadata_xml']})
d['extras'].append({'key': 'openstatus', 'value': openmap(rec['open'])})
if rec['licenseshort'] is not None:
d['license_id'] = rec['licenseshort']
if rec['open'] is not None:
d['isopen'] = rec['open'] #Note that None gets mapped to False in CKAN
if rec['publisher'] is not None:
d['maintainer'] = rec['publisher']
#N.B. groups have to be created, before they can be assigned
#Groups are dictionaries. We use them via title which is what we store
d['groups'] = map(category_to_group, rec['categories'])
d['resources'] = set()
#Duplicates in resources not allowed. Actually we shouldn't allow them either...
rurls = set()
for url in rec['filelist']:
rurls.add(url)
d['resources'] = []
for url in rurls:
d['resources'].append({'url': url})
return d
def ckanCreate(url, apikey, rec):
dataset_dict = dBtoCKAN(rec)
data_string = urllib.quote(json.dumps(dataset_dict))
request = urllib2.Request(url +'/api/action/package_create')
request.add_header('Authorization', apikey)
try:
response = urllib2.urlopen(request, data_string)
except:
print 'ERROR Failed to create:'
pprint.pprint(dataset_dict)
raw_input("Press Enter to continue...")
#response_dict = json.loads(response.read())
#created_package = response_dict['result']
#pprint.pprint(created_package)
for rec in dict_cur.fetchall():
ckanCreate(url, apikey, rec)