diff --git a/data_standards/concerns-instance.json b/data_standards/concerns-instance.json new file mode 100644 index 00000000..f83d4193 --- /dev/null +++ b/data_standards/concerns-instance.json @@ -0,0 +1,38 @@ +[ + { + "id": 67658, + "dateCreated": "2016-02-12T21:22:30.000Z", + "status": "Unassigned", + "tags": ["poorSignange", "driversIgnoreSignage"], + "location": { + "latitude": 42.341146333981548, + "longitude": -71.07689857738815 + }, + "address": "685 Tremont Street, Boston", + "summary": "Drivers do not stop for pedestrians in the designated crosswalks" + }, + { + "id": 17674, + "dateCreated": "2016-01-22T17:01:16.000Z", + "status": "Unassigned", + "tags": ["driversIgnoreSignage"], + "location": { + "latitude": 42.346418898652274, + "longitude": -71.105465175899695 + }, + "address": "901 Beacon Street, Boston", + "summary": "Drivers in the Left turn only lane will go straight and cause accidents with drivers in the adjacent lane who also go straight in the same lane" + }, + { + "id": 17452, + "dateCreated": "2016-01-22T21:20:57.000Z", + "status": "Unassigned", + "tags": ["driversIgnoringSignage", "misuseOfBusLane"], + "location": { + "latitude": 42.352435140312387, + "longitude": -71.061978521716284 + }, + "address": "20 Essex Street, Boston", + "summary": "Too many drivers are using the bus lane." + } +] diff --git a/data_standards/concerns-schema.json b/data_standards/concerns-schema.json new file mode 100644 index 00000000..a8e2c71a --- /dev/null +++ b/data_standards/concerns-schema.json @@ -0,0 +1,70 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "title": "Concerns", + "description": "Defines the structure of a set of concerns", + "type": "array", + "items": { + "title": "Concern", + "description": "Defines the structure of a concern", + "type": "object", + "properties": { + "source": { + "description": "Source of concern", + "type": "string", + "enum": ["seeclickfix", "visionzero"] + }, + "id": { + "description": "Unique identifier of concern", + "type": "number" + }, + "dateCreated": { + "description": "Date concern was created, ISO8601 formatted", + "type": "string", + "format": "date-time" + }, + "dateResolved": { + "description": "Date concern was resolved, ISO8601 formatted", + "format": "date-time" + }, + "status": { + "description": "Status of concern", + "type": "string" + }, + "category": { + "description": "Primary category of concern", + "type": "string" + }, + "subCategories": { + "description": "Subcategories of concern", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "location": { + "description": "Coordinates of concern, WGS84 formatted", + "type": "object", + "properties": { + "latitude": { + "description": "Latitude of concern", + "type": "number" + }, + "longitude": { + "description": "Longitude of concern", + "type": "number" + } + } + }, + "address": { + "description": "Address of concern", + "type": "string" + }, + "summary": { + "description": "Summary of concern", + "type": "string" + } + }, + "required": ["id", "source", "dateCreated", "status", "category", "location"] + } +} diff --git a/data_standards/crashes-instance.json b/data_standards/crashes-instance.json new file mode 100644 index 00000000..7728757f --- /dev/null +++ b/data_standards/crashes-instance.json @@ -0,0 +1,41 @@ +[ + { + "id": 1403832, + "dateOccurred": "2016-01-01T00:56:45-05:00", + "vehicles": [ + { "category": "car" } + ], + "location": { + "latitude": 42.300864811284519, + "longitude": -71.071316786983303 + }, + "address": "14 Corona Street", + "summary": "REPORTED INJURIES (P) (E) (F)" + }, + { + "id": 1410434, + "dateOccurred": "2016-01-04T15:11:11-05:00", + "type": "pedestrian", + "persons": [ + { "category": "pedestrian", "quantity": 1 } + ], + "location": { + "latitude": 42.332547160943271, + "longitude": -71.072124196868316 + }, + "summary": "PEDESTRIAN STRUCK (P) (E) (F)" + }, + { + "id": 1410816, + "dateOccurred": "2016-01-04T18:42:24-05:00", + "vehicles": [ + { "category": "car" } + ], + "location": { + "latitude": 42.311376108568268, + "longitude": -71.081614220307372 + }, + "address": "67 Brunswick Street, Boston", + "summary": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)" + } +] diff --git a/data_standards/crashes-schema.json b/data_standards/crashes-schema.json new file mode 100644 index 00000000..25f70543 --- /dev/null +++ b/data_standards/crashes-schema.json @@ -0,0 +1,94 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "title": "Crashes", + "description": "Defines the structure of a set of crashes", + "type": "array", + "items": { + "title": "Crash", + "description": "Defines the structure of a crash", + "type": "object", + "properties": { + "id": { + "description": "Unique identifier of crash", + "type": "number" + }, + "dateOccurred": { + "description": "Date crash occurred, ISO8601 formatted", + "type": "string", + "format": "date-time" + }, + "location": { + "description": "Coordinates of crash, WGS84 formatted", + "type": "object", + "properties": { + "latitude": { + "description": "Latitude of crash", + "type": "number" + }, + "longitude": { + "description": "Longitude of crash", + "type": "number" + } + } + }, + "vehicles": { + "description": "Vehicles involved in crash", + "type": "array", + "items": { + "title": "Vehicle", + "description": "Defines the structure of a vehicle", + "type": "object", + "properties": { + "category": { + "description": "Category of vehicle", + "type": "string", + "enum": ["car", "bike", "taxi", "bus", "truck"] + }, + "quantity": { + "description": "Quantity of vehicles of this category", + "type": "number" + } + }, + "required": ["category"] + }, + "uniqueItems": true + }, + "persons": { + "description": "Persons involved in crash", + "type": "array", + "items": { + "title": "Person", + "description": "Defines the structure of a person", + "type": "object", + "properties": { + "category": { + "description": "Category of person", + "type": "string", + "enum": ["driver", "pedestrian", "cyclist", "other"] + }, + "quantity": { + "description": "Quantity of persons", + "type": "number" + }, + "injuryType": { + "description": "Type of injury", + "type": "string", + "enum": ["minor", "major", "fatal", "unknown"] + } + }, + "required": ["category"] + }, + "uniqueItems": true + }, + "address": { + "description": "Address of crash", + "type": "string" + }, + "summary": { + "description": "Summary of crash", + "type": "string" + } + }, + "required": ["id", "dateOccurred", "location"] + } +} diff --git a/src/data_transformation/transform_concerns.py b/src/data_transformation/transform_concerns.py new file mode 100644 index 00000000..86691dea --- /dev/null +++ b/src/data_transformation/transform_concerns.py @@ -0,0 +1,139 @@ +# Transform a concerns CSV into compatible JSON document. +# Author terryf82 https://github.com/terryf82 + +import argparse +import dateutil.parser as date_parser +import json +import os +import pandas as pd +from collections import OrderedDict + +parser = argparse.ArgumentParser() +parser.add_argument("-d", "--destination", type=str, + help="destination name") +parser.add_argument("-f", "--folder", type=str, + help="absolute path to destination folder") + +args = parser.parse_args() + +raw_path = os.path.join(args.folder, "raw") +if not os.path.exists(raw_path): + print raw_path+" not found, exiting" + exit(1) + +valid_concerns = [] +manual_concern_id = 1 + +print "searching "+raw_path+" for raw concerns file(s)" + +for csv_file in os.listdir(raw_path): + print csv_file + + + df_concerns = pd.read_csv(os.path.join(raw_path, csv_file), na_filter=False) + dict_concerns = df_concerns.to_dict("records") + + for key in dict_concerns: + if args.destination == "boston": + # Boston presently has concerns from two sources - VisionZero and SeeClickFix + if csv_file == "Vision_Zero_Entry.csv": + source = "visionzero" + # skip concerns that don't have a date or request type + if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "": + continue + + else: + valid_concern = OrderedDict([ + ("id", key["OBJECTID"]), + ("source", "visionzero"), + ("dateCreated", key["REQUESTDATE"]), + ("status", key["STATUS"]), + ("category", key["REQUESTTYPE"]), + ("location", OrderedDict([ + ("latitude", key["Y"]), + ("longitude", key["X"]) + ])) + ]) + + # only add summary property if data exists + if key["COMMENTS"] != "": + valid_concern.update({"summary": key["COMMENTS"]}) + + elif csv_file == "bos_scf.csv": + source = "seeclickfix" + # skip concerns that don't have a date or request type + if key["created"] == "" or key["summary"] == "": + continue + + else: + valid_concern = OrderedDict([ + ("id", manual_concern_id), + ("source", "seeclickfix"), + ("dateCreated", key["created"]), + ("status", "unknown"), + ("category", key["summary"]), + ("location", OrderedDict([ + ("latitude", key["Y"]), + ("longitude", key["X"]) + ])) + ]) + + # only add summary property if data exists + if key["description"] != "": + valid_concern.update({"summary": key["description"]}) + + valid_concerns.append(valid_concern) + manual_concern_id += 1 + + if args.destination == "dc": + # skip concerns that don't have a date or request type + if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "": + continue + + valid_concern = OrderedDict([ + ("id", key["OBJECTID"]), + ("dateCreated", key["REQUESTDATE"]), + ("status", key["STATUS"]), + ("category", key["REQUESTTYPE"]), + ("location", OrderedDict([ + ("latitude", key["Y"]), + ("longitude", key["X"]) + ])) + ]) + + # only add summary property if data exists + if key["COMMENTS"] != "": + valid_concern.update({"summary": key["COMMENTS"]}) + + valid_concerns.append(valid_concern) + + elif args.destination == "cambridge": + # skip concerns that don't have a date or issue type + if key["ticket_created_date_time"] == "" or key["issue_type"] == "": + continue + + valid_concern = OrderedDict([ + ("id", key["ticket_id"]), + ("dateCreated", str(date_parser.parse(key["ticket_created_date_time"]))+"-05:00"), + ("status", key["ticket_status"]), + ("category", key["issue_type"]), + ("location", OrderedDict([ + ("latitude", key["lat"]), + ("longitude", key["lng"]) + ])) + ]) + + # only add summary property if data exists + if key["issue_description"] != "": + valid_concern.update({"summary": key["issue_description"]}) + + valid_concerns.append(valid_concern) + +print "done, {} valid concerns loaded".format(len(valid_concerns)) + +concerns_output = os.path.join(args.folder, "transformed/concerns.json") + +with open(concerns_output, "w") as f: + json.dump(valid_concerns, f) + +print "output written to {}".format(concerns_output) diff --git a/src/data_transformation/transform_crashes.py b/src/data_transformation/transform_crashes.py new file mode 100644 index 00000000..eee70cbb --- /dev/null +++ b/src/data_transformation/transform_crashes.py @@ -0,0 +1,153 @@ +# Transform a crashes CSV into compatible JSON document. +# Author terryf82 https://github.com/terryf82 + +import argparse +import datetime +import dateutil.parser as date_parser +import json +import os +import pandas as pd +from collections import OrderedDict + +parser = argparse.ArgumentParser() +parser.add_argument("-d", "--destination", type=str, + help="destination name") +parser.add_argument("-f", "--folder", type=str, + help="absolute path to destination folder") + +args = parser.parse_args() + +raw_path = os.path.join(args.folder, "raw") +if not os.path.exists(raw_path): + print raw_path+" not found, exiting" + exit(1) + +valid_crashes = [] +manual_crash_id = 1 + +print "searching "+raw_path+" for raw crash file(s)" + +for csv_file in os.listdir(raw_path): + print csv_file + df_crashes = pd.read_csv(os.path.join(raw_path, csv_file), na_filter=False) + dict_crashes = df_crashes.to_dict("records") + + for key in dict_crashes: + if args.destination == "boston": + # skip crashes that don't have X, Y and date details + if key["X"] == "" or key["Y"] == "" or key["CALENDAR_DATE"] == "": + continue + + # 2015 and 2017 files + # date requires no modification + # time exists as seconds since midnight + if "TIME," in key: + if key["TIME,"] != "": + formatted_date = key["CALENDAR_DATE"] + m, s = divmod(int(key["TIME,"]), 60) + h, m = divmod(m, 60) + formatted_time = str("%02d:%02d:%02d" % (h, m, s)) + else: + continue + + # 2016 file + # date requires splitting + # time requires no modification + if "TIME" in key: + if key["TIME"] != "": + formatted_date = key["CALENDAR_DATE"].split(" ")[0] + formatted_time = key["TIME"] + else: + continue + + valid_crash = OrderedDict([ + ("id", key["CAD_EVENT_REL_COMMON_ID"]), + # assume all crashes are in local time (GMT-5) + ("dateOccurred", formatted_date+"T"+formatted_time+"-05:00"), + ("location", OrderedDict([ + ("latitude", float(key["Y"])), + ("longitude", float(key["X"])) + ])) + ]) + + # very basic transformation of mode_type into vehicles + valid_crash["vehicles"] = [] + + # all crashes are assumed to have involved a car + valid_crash["vehicles"].append({"category": "car"}) + + if key["mode_type"] == "bike": + valid_crash["vehicles"].append({"category": "bike"}) + + # TODO persons + + if key["FIRST_EVENT_SUBTYPE"] != "": + valid_crash["summary"] = key["FIRST_EVENT_SUBTYPE"] + + valid_crashes.append(valid_crash) + + elif args.destination == "cambridge": + # skip crashes that don't have a date, X and Y + if key["Date Time"] == "" or key["X"] == "" or key["Y"] == "": + continue + + valid_crash = OrderedDict([ + ("id", manual_crash_id), + # assume all crashes are in local time (GMT-5) + ("dateOccurred", str(date_parser.parse(key["Date Time"]))+"-05:00"), + ("location", OrderedDict([ + ("latitude", float(key["Y"])), + ("longitude", float(key["X"])) + ])) + ]) + + # TODO persons + + if key["V1 First Event"] != "": + valid_crash["summary"] = key["V1 First Event"] + + valid_crashes.append(valid_crash) + manual_crash_id += 1 + + elif args.destination == "dc": + # skip crashes that don't have a date, X and Y + if key["REPORTDATE"] == "" or key["X"] == "" or key["Y"] == "": + continue + + valid_crash = OrderedDict([ + ("id", key["OBJECTID"]), + ("dateOccurred", key["REPORTDATE"]), + ("location", OrderedDict([ + ("latitude", float(key["Y"])), + ("longitude", float(key["X"])) + ])) + ]) + + if key["TOTAL_VEHICLES"] != 0 or key["TOTAL_BICYCLES"] != 0: + valid_crash["vehicles"] = [] + + if key["TOTAL_VEHICLES"] != 0: + valid_crash["vehicles"].append({"category": "car", "quantity": key["TOTAL_VEHICLES"]}) + + if key["TOTAL_BICYCLES"] != 0: + valid_crash["vehicles"].append({"category": "bike", "quantity": key["TOTAL_BICYCLES"]}) + + # TODO persons + + if key["ADDRESS"] != "": + valid_crash["address"] = key["ADDRESS"] + + valid_crashes.append(valid_crash) + + else: + print "transformation of "+args.destination+" crashes not yet implemented" + exit(1) + +print "done, {} valid crashes loaded".format(len(valid_crashes)) + +crashes_output = os.path.join(args.folder, "transformed/crashes.json") + +with open(crashes_output, "w") as f: + json.dump(valid_crashes, f) + +print "output written to {}".format(crashes_output)