-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #95 from Data4Democracy/data_standards
Data standards
- Loading branch information
Showing
6 changed files
with
535 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
[ | ||
{ | ||
"id": 67658, | ||
"dateCreated": "2016-02-12T21:22:30.000Z", | ||
"status": "Unassigned", | ||
"tags": ["poorSignange", "driversIgnoreSignage"], | ||
"location": { | ||
"latitude": 42.341146333981548, | ||
"longitude": -71.07689857738815 | ||
}, | ||
"address": "685 Tremont Street, Boston", | ||
"summary": "Drivers do not stop for pedestrians in the designated crosswalks" | ||
}, | ||
{ | ||
"id": 17674, | ||
"dateCreated": "2016-01-22T17:01:16.000Z", | ||
"status": "Unassigned", | ||
"tags": ["driversIgnoreSignage"], | ||
"location": { | ||
"latitude": 42.346418898652274, | ||
"longitude": -71.105465175899695 | ||
}, | ||
"address": "901 Beacon Street, Boston", | ||
"summary": "Drivers in the Left turn only lane will go straight and cause accidents with drivers in the adjacent lane who also go straight in the same lane" | ||
}, | ||
{ | ||
"id": 17452, | ||
"dateCreated": "2016-01-22T21:20:57.000Z", | ||
"status": "Unassigned", | ||
"tags": ["driversIgnoringSignage", "misuseOfBusLane"], | ||
"location": { | ||
"latitude": 42.352435140312387, | ||
"longitude": -71.061978521716284 | ||
}, | ||
"address": "20 Essex Street, Boston", | ||
"summary": "Too many drivers are using the bus lane." | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-06/schema#", | ||
"title": "Concerns", | ||
"description": "Defines the structure of a set of concerns", | ||
"type": "array", | ||
"items": { | ||
"title": "Concern", | ||
"description": "Defines the structure of a concern", | ||
"type": "object", | ||
"properties": { | ||
"source": { | ||
"description": "Source of concern", | ||
"type": "string", | ||
"enum": ["seeclickfix", "visionzero"] | ||
}, | ||
"id": { | ||
"description": "Unique identifier of concern", | ||
"type": "number" | ||
}, | ||
"dateCreated": { | ||
"description": "Date concern was created, ISO8601 formatted", | ||
"type": "string", | ||
"format": "date-time" | ||
}, | ||
"dateResolved": { | ||
"description": "Date concern was resolved, ISO8601 formatted", | ||
"format": "date-time" | ||
}, | ||
"status": { | ||
"description": "Status of concern", | ||
"type": "string" | ||
}, | ||
"category": { | ||
"description": "Primary category of concern", | ||
"type": "string" | ||
}, | ||
"subCategories": { | ||
"description": "Subcategories of concern", | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
}, | ||
"uniqueItems": true | ||
}, | ||
"location": { | ||
"description": "Coordinates of concern, WGS84 formatted", | ||
"type": "object", | ||
"properties": { | ||
"latitude": { | ||
"description": "Latitude of concern", | ||
"type": "number" | ||
}, | ||
"longitude": { | ||
"description": "Longitude of concern", | ||
"type": "number" | ||
} | ||
} | ||
}, | ||
"address": { | ||
"description": "Address of concern", | ||
"type": "string" | ||
}, | ||
"summary": { | ||
"description": "Summary of concern", | ||
"type": "string" | ||
} | ||
}, | ||
"required": ["id", "source", "dateCreated", "status", "category", "location"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
[ | ||
{ | ||
"id": 1403832, | ||
"dateOccurred": "2016-01-01T00:56:45-05:00", | ||
"vehicles": [ | ||
{ "category": "car" } | ||
], | ||
"location": { | ||
"latitude": 42.300864811284519, | ||
"longitude": -71.071316786983303 | ||
}, | ||
"address": "14 Corona Street", | ||
"summary": "REPORTED INJURIES (P) (E) (F)" | ||
}, | ||
{ | ||
"id": 1410434, | ||
"dateOccurred": "2016-01-04T15:11:11-05:00", | ||
"type": "pedestrian", | ||
"persons": [ | ||
{ "category": "pedestrian", "quantity": 1 } | ||
], | ||
"location": { | ||
"latitude": 42.332547160943271, | ||
"longitude": -71.072124196868316 | ||
}, | ||
"summary": "PEDESTRIAN STRUCK (P) (E) (F)" | ||
}, | ||
{ | ||
"id": 1410816, | ||
"dateOccurred": "2016-01-04T18:42:24-05:00", | ||
"vehicles": [ | ||
{ "category": "car" } | ||
], | ||
"location": { | ||
"latitude": 42.311376108568268, | ||
"longitude": -71.081614220307372 | ||
}, | ||
"address": "67 Brunswick Street, Boston", | ||
"summary": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-06/schema#", | ||
"title": "Crashes", | ||
"description": "Defines the structure of a set of crashes", | ||
"type": "array", | ||
"items": { | ||
"title": "Crash", | ||
"description": "Defines the structure of a crash", | ||
"type": "object", | ||
"properties": { | ||
"id": { | ||
"description": "Unique identifier of crash", | ||
"type": "number" | ||
}, | ||
"dateOccurred": { | ||
"description": "Date crash occurred, ISO8601 formatted", | ||
"type": "string", | ||
"format": "date-time" | ||
}, | ||
"location": { | ||
"description": "Coordinates of crash, WGS84 formatted", | ||
"type": "object", | ||
"properties": { | ||
"latitude": { | ||
"description": "Latitude of crash", | ||
"type": "number" | ||
}, | ||
"longitude": { | ||
"description": "Longitude of crash", | ||
"type": "number" | ||
} | ||
} | ||
}, | ||
"vehicles": { | ||
"description": "Vehicles involved in crash", | ||
"type": "array", | ||
"items": { | ||
"title": "Vehicle", | ||
"description": "Defines the structure of a vehicle", | ||
"type": "object", | ||
"properties": { | ||
"category": { | ||
"description": "Category of vehicle", | ||
"type": "string", | ||
"enum": ["car", "bike", "taxi", "bus", "truck"] | ||
}, | ||
"quantity": { | ||
"description": "Quantity of vehicles of this category", | ||
"type": "number" | ||
} | ||
}, | ||
"required": ["category"] | ||
}, | ||
"uniqueItems": true | ||
}, | ||
"persons": { | ||
"description": "Persons involved in crash", | ||
"type": "array", | ||
"items": { | ||
"title": "Person", | ||
"description": "Defines the structure of a person", | ||
"type": "object", | ||
"properties": { | ||
"category": { | ||
"description": "Category of person", | ||
"type": "string", | ||
"enum": ["driver", "pedestrian", "cyclist", "other"] | ||
}, | ||
"quantity": { | ||
"description": "Quantity of persons", | ||
"type": "number" | ||
}, | ||
"injuryType": { | ||
"description": "Type of injury", | ||
"type": "string", | ||
"enum": ["minor", "major", "fatal", "unknown"] | ||
} | ||
}, | ||
"required": ["category"] | ||
}, | ||
"uniqueItems": true | ||
}, | ||
"address": { | ||
"description": "Address of crash", | ||
"type": "string" | ||
}, | ||
"summary": { | ||
"description": "Summary of crash", | ||
"type": "string" | ||
} | ||
}, | ||
"required": ["id", "dateOccurred", "location"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# Transform a concerns CSV into compatible JSON document. | ||
# Author terryf82 https://github.com/terryf82 | ||
|
||
import argparse | ||
import dateutil.parser as date_parser | ||
import json | ||
import os | ||
import pandas as pd | ||
from collections import OrderedDict | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-d", "--destination", type=str, | ||
help="destination name") | ||
parser.add_argument("-f", "--folder", type=str, | ||
help="absolute path to destination folder") | ||
|
||
args = parser.parse_args() | ||
|
||
raw_path = os.path.join(args.folder, "raw") | ||
if not os.path.exists(raw_path): | ||
print raw_path+" not found, exiting" | ||
exit(1) | ||
|
||
valid_concerns = [] | ||
manual_concern_id = 1 | ||
|
||
print "searching "+raw_path+" for raw concerns file(s)" | ||
|
||
for csv_file in os.listdir(raw_path): | ||
print csv_file | ||
|
||
|
||
df_concerns = pd.read_csv(os.path.join(raw_path, csv_file), na_filter=False) | ||
dict_concerns = df_concerns.to_dict("records") | ||
|
||
for key in dict_concerns: | ||
if args.destination == "boston": | ||
# Boston presently has concerns from two sources - VisionZero and SeeClickFix | ||
if csv_file == "Vision_Zero_Entry.csv": | ||
source = "visionzero" | ||
# skip concerns that don't have a date or request type | ||
if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "": | ||
continue | ||
|
||
else: | ||
valid_concern = OrderedDict([ | ||
("id", key["OBJECTID"]), | ||
("source", "visionzero"), | ||
("dateCreated", key["REQUESTDATE"]), | ||
("status", key["STATUS"]), | ||
("category", key["REQUESTTYPE"]), | ||
("location", OrderedDict([ | ||
("latitude", key["Y"]), | ||
("longitude", key["X"]) | ||
])) | ||
]) | ||
|
||
# only add summary property if data exists | ||
if key["COMMENTS"] != "": | ||
valid_concern.update({"summary": key["COMMENTS"]}) | ||
|
||
elif csv_file == "bos_scf.csv": | ||
source = "seeclickfix" | ||
# skip concerns that don't have a date or request type | ||
if key["created"] == "" or key["summary"] == "": | ||
continue | ||
|
||
else: | ||
valid_concern = OrderedDict([ | ||
("id", manual_concern_id), | ||
("source", "seeclickfix"), | ||
("dateCreated", key["created"]), | ||
("status", "unknown"), | ||
("category", key["summary"]), | ||
("location", OrderedDict([ | ||
("latitude", key["Y"]), | ||
("longitude", key["X"]) | ||
])) | ||
]) | ||
|
||
# only add summary property if data exists | ||
if key["description"] != "": | ||
valid_concern.update({"summary": key["description"]}) | ||
|
||
valid_concerns.append(valid_concern) | ||
manual_concern_id += 1 | ||
|
||
if args.destination == "dc": | ||
# skip concerns that don't have a date or request type | ||
if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "": | ||
continue | ||
|
||
valid_concern = OrderedDict([ | ||
("id", key["OBJECTID"]), | ||
("dateCreated", key["REQUESTDATE"]), | ||
("status", key["STATUS"]), | ||
("category", key["REQUESTTYPE"]), | ||
("location", OrderedDict([ | ||
("latitude", key["Y"]), | ||
("longitude", key["X"]) | ||
])) | ||
]) | ||
|
||
# only add summary property if data exists | ||
if key["COMMENTS"] != "": | ||
valid_concern.update({"summary": key["COMMENTS"]}) | ||
|
||
valid_concerns.append(valid_concern) | ||
|
||
elif args.destination == "cambridge": | ||
# skip concerns that don't have a date or issue type | ||
if key["ticket_created_date_time"] == "" or key["issue_type"] == "": | ||
continue | ||
|
||
valid_concern = OrderedDict([ | ||
("id", key["ticket_id"]), | ||
("dateCreated", str(date_parser.parse(key["ticket_created_date_time"]))+"-05:00"), | ||
("status", key["ticket_status"]), | ||
("category", key["issue_type"]), | ||
("location", OrderedDict([ | ||
("latitude", key["lat"]), | ||
("longitude", key["lng"]) | ||
])) | ||
]) | ||
|
||
# only add summary property if data exists | ||
if key["issue_description"] != "": | ||
valid_concern.update({"summary": key["issue_description"]}) | ||
|
||
valid_concerns.append(valid_concern) | ||
|
||
print "done, {} valid concerns loaded".format(len(valid_concerns)) | ||
|
||
concerns_output = os.path.join(args.folder, "transformed/concerns.json") | ||
|
||
with open(concerns_output, "w") as f: | ||
json.dump(valid_concerns, f) | ||
|
||
print "output written to {}".format(concerns_output) |
Oops, something went wrong.