Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data standards #95

Merged
merged 13 commits into from
Apr 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions data_standards/concerns-instance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[
{
"id": 67658,
"dateCreated": "2016-02-12T21:22:30.000Z",
"status": "Unassigned",
"tags": ["poorSignange", "driversIgnoreSignage"],
"location": {
"latitude": 42.341146333981548,
"longitude": -71.07689857738815
},
"address": "685 Tremont Street, Boston",
"summary": "Drivers do not stop for pedestrians in the designated crosswalks"
},
{
"id": 17674,
"dateCreated": "2016-01-22T17:01:16.000Z",
"status": "Unassigned",
"tags": ["driversIgnoreSignage"],
"location": {
"latitude": 42.346418898652274,
"longitude": -71.105465175899695
},
"address": "901 Beacon Street, Boston",
"summary": "Drivers in the Left turn only lane will go straight and cause accidents with drivers in the adjacent lane who also go straight in the same lane"
},
{
"id": 17452,
"dateCreated": "2016-01-22T21:20:57.000Z",
"status": "Unassigned",
"tags": ["driversIgnoringSignage", "misuseOfBusLane"],
"location": {
"latitude": 42.352435140312387,
"longitude": -71.061978521716284
},
"address": "20 Essex Street, Boston",
"summary": "Too many drivers are using the bus lane."
}
]
70 changes: 70 additions & 0 deletions data_standards/concerns-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"title": "Concerns",
"description": "Defines the structure of a set of concerns",
"type": "array",
"items": {
"title": "Concern",
"description": "Defines the structure of a concern",
"type": "object",
"properties": {
"source": {
"description": "Source of concern",
"type": "string",
"enum": ["seeclickfix", "visionzero"]
},
"id": {
"description": "Unique identifier of concern",
"type": "number"
},
"dateCreated": {
"description": "Date concern was created, ISO8601 formatted",
"type": "string",
"format": "date-time"
},
"dateResolved": {
"description": "Date concern was resolved, ISO8601 formatted",
"format": "date-time"
},
"status": {
"description": "Status of concern",
"type": "string"
},
"category": {
"description": "Primary category of concern",
"type": "string"
},
"subCategories": {
"description": "Subcategories of concern",
"type": "array",
"items": {
"type": "string"
},
"uniqueItems": true
},
"location": {
"description": "Coordinates of concern, WGS84 formatted",
"type": "object",
"properties": {
"latitude": {
"description": "Latitude of concern",
"type": "number"
},
"longitude": {
"description": "Longitude of concern",
"type": "number"
}
}
},
"address": {
"description": "Address of concern",
"type": "string"
},
"summary": {
"description": "Summary of concern",
"type": "string"
}
},
"required": ["id", "source", "dateCreated", "status", "category", "location"]
}
}
41 changes: 41 additions & 0 deletions data_standards/crashes-instance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[
{
"id": 1403832,
"dateOccurred": "2016-01-01T00:56:45-05:00",
"vehicles": [
{ "category": "car" }
],
"location": {
"latitude": 42.300864811284519,
"longitude": -71.071316786983303
},
"address": "14 Corona Street",
"summary": "REPORTED INJURIES (P) (E) (F)"
},
{
"id": 1410434,
"dateOccurred": "2016-01-04T15:11:11-05:00",
"type": "pedestrian",
"persons": [
{ "category": "pedestrian", "quantity": 1 }
],
"location": {
"latitude": 42.332547160943271,
"longitude": -71.072124196868316
},
"summary": "PEDESTRIAN STRUCK (P) (E) (F)"
},
{
"id": 1410816,
"dateOccurred": "2016-01-04T18:42:24-05:00",
"vehicles": [
{ "category": "car" }
],
"location": {
"latitude": 42.311376108568268,
"longitude": -71.081614220307372
},
"address": "67 Brunswick Street, Boston",
"summary": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)"
}
]
94 changes: 94 additions & 0 deletions data_standards/crashes-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"title": "Crashes",
"description": "Defines the structure of a set of crashes",
"type": "array",
"items": {
"title": "Crash",
"description": "Defines the structure of a crash",
"type": "object",
"properties": {
"id": {
"description": "Unique identifier of crash",
"type": "number"
},
"dateOccurred": {
"description": "Date crash occurred, ISO8601 formatted",
"type": "string",
"format": "date-time"
},
"location": {
"description": "Coordinates of crash, WGS84 formatted",
"type": "object",
"properties": {
"latitude": {
"description": "Latitude of crash",
"type": "number"
},
"longitude": {
"description": "Longitude of crash",
"type": "number"
}
}
},
"vehicles": {
"description": "Vehicles involved in crash",
"type": "array",
"items": {
"title": "Vehicle",
"description": "Defines the structure of a vehicle",
"type": "object",
"properties": {
"category": {
"description": "Category of vehicle",
"type": "string",
"enum": ["car", "bike", "taxi", "bus", "truck"]
},
"quantity": {
"description": "Quantity of vehicles of this category",
"type": "number"
}
},
"required": ["category"]
},
"uniqueItems": true
},
"persons": {
"description": "Persons involved in crash",
"type": "array",
"items": {
"title": "Person",
"description": "Defines the structure of a person",
"type": "object",
"properties": {
"category": {
"description": "Category of person",
"type": "string",
"enum": ["driver", "pedestrian", "cyclist", "other"]
},
"quantity": {
"description": "Quantity of persons",
"type": "number"
},
"injuryType": {
"description": "Type of injury",
"type": "string",
"enum": ["minor", "major", "fatal", "unknown"]
}
},
"required": ["category"]
},
"uniqueItems": true
},
"address": {
"description": "Address of crash",
"type": "string"
},
"summary": {
"description": "Summary of crash",
"type": "string"
}
},
"required": ["id", "dateOccurred", "location"]
}
}
139 changes: 139 additions & 0 deletions src/data_transformation/transform_concerns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Transform a concerns CSV into compatible JSON document.
# Author terryf82 https://github.com/terryf82

import argparse
import dateutil.parser as date_parser
import json
import os
import pandas as pd
from collections import OrderedDict

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--destination", type=str,
help="destination name")
parser.add_argument("-f", "--folder", type=str,
help="absolute path to destination folder")

args = parser.parse_args()

raw_path = os.path.join(args.folder, "raw")
if not os.path.exists(raw_path):
print raw_path+" not found, exiting"
exit(1)

valid_concerns = []
manual_concern_id = 1

print "searching "+raw_path+" for raw concerns file(s)"

for csv_file in os.listdir(raw_path):
print csv_file


df_concerns = pd.read_csv(os.path.join(raw_path, csv_file), na_filter=False)
dict_concerns = df_concerns.to_dict("records")

for key in dict_concerns:
if args.destination == "boston":
# Boston presently has concerns from two sources - VisionZero and SeeClickFix
if csv_file == "Vision_Zero_Entry.csv":
source = "visionzero"
# skip concerns that don't have a date or request type
if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "":
continue

else:
valid_concern = OrderedDict([
("id", key["OBJECTID"]),
("source", "visionzero"),
("dateCreated", key["REQUESTDATE"]),
("status", key["STATUS"]),
("category", key["REQUESTTYPE"]),
("location", OrderedDict([
("latitude", key["Y"]),
("longitude", key["X"])
]))
])

# only add summary property if data exists
if key["COMMENTS"] != "":
valid_concern.update({"summary": key["COMMENTS"]})

elif csv_file == "bos_scf.csv":
source = "seeclickfix"
# skip concerns that don't have a date or request type
if key["created"] == "" or key["summary"] == "":
continue

else:
valid_concern = OrderedDict([
("id", manual_concern_id),
("source", "seeclickfix"),
("dateCreated", key["created"]),
("status", "unknown"),
("category", key["summary"]),
("location", OrderedDict([
("latitude", key["Y"]),
("longitude", key["X"])
]))
])

# only add summary property if data exists
if key["description"] != "":
valid_concern.update({"summary": key["description"]})

valid_concerns.append(valid_concern)
manual_concern_id += 1

if args.destination == "dc":
# skip concerns that don't have a date or request type
if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "":
continue

valid_concern = OrderedDict([
("id", key["OBJECTID"]),
("dateCreated", key["REQUESTDATE"]),
("status", key["STATUS"]),
("category", key["REQUESTTYPE"]),
("location", OrderedDict([
("latitude", key["Y"]),
("longitude", key["X"])
]))
])

# only add summary property if data exists
if key["COMMENTS"] != "":
valid_concern.update({"summary": key["COMMENTS"]})

valid_concerns.append(valid_concern)

elif args.destination == "cambridge":
# skip concerns that don't have a date or issue type
if key["ticket_created_date_time"] == "" or key["issue_type"] == "":
continue

valid_concern = OrderedDict([
("id", key["ticket_id"]),
("dateCreated", str(date_parser.parse(key["ticket_created_date_time"]))+"-05:00"),
("status", key["ticket_status"]),
("category", key["issue_type"]),
("location", OrderedDict([
("latitude", key["lat"]),
("longitude", key["lng"])
]))
])

# only add summary property if data exists
if key["issue_description"] != "":
valid_concern.update({"summary": key["issue_description"]})

valid_concerns.append(valid_concern)

print "done, {} valid concerns loaded".format(len(valid_concerns))

concerns_output = os.path.join(args.folder, "transformed/concerns.json")

with open(concerns_output, "w") as f:
json.dump(valid_concerns, f)

print "output written to {}".format(concerns_output)
Loading