Skip to content

Commit

Permalink
Merge pull request #95 from Data4Democracy/data_standards
Browse files Browse the repository at this point in the history
Data standards
  • Loading branch information
terryf82 authored Apr 2, 2018
2 parents 479f051 + 25e04bf commit 505c4cf
Show file tree
Hide file tree
Showing 6 changed files with 535 additions and 0 deletions.
38 changes: 38 additions & 0 deletions data_standards/concerns-instance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[
{
"id": 67658,
"dateCreated": "2016-02-12T21:22:30.000Z",
"status": "Unassigned",
"tags": ["poorSignange", "driversIgnoreSignage"],
"location": {
"latitude": 42.341146333981548,
"longitude": -71.07689857738815
},
"address": "685 Tremont Street, Boston",
"summary": "Drivers do not stop for pedestrians in the designated crosswalks"
},
{
"id": 17674,
"dateCreated": "2016-01-22T17:01:16.000Z",
"status": "Unassigned",
"tags": ["driversIgnoreSignage"],
"location": {
"latitude": 42.346418898652274,
"longitude": -71.105465175899695
},
"address": "901 Beacon Street, Boston",
"summary": "Drivers in the Left turn only lane will go straight and cause accidents with drivers in the adjacent lane who also go straight in the same lane"
},
{
"id": 17452,
"dateCreated": "2016-01-22T21:20:57.000Z",
"status": "Unassigned",
"tags": ["driversIgnoringSignage", "misuseOfBusLane"],
"location": {
"latitude": 42.352435140312387,
"longitude": -71.061978521716284
},
"address": "20 Essex Street, Boston",
"summary": "Too many drivers are using the bus lane."
}
]
70 changes: 70 additions & 0 deletions data_standards/concerns-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"title": "Concerns",
"description": "Defines the structure of a set of concerns",
"type": "array",
"items": {
"title": "Concern",
"description": "Defines the structure of a concern",
"type": "object",
"properties": {
"source": {
"description": "Source of concern",
"type": "string",
"enum": ["seeclickfix", "visionzero"]
},
"id": {
"description": "Unique identifier of concern",
"type": "number"
},
"dateCreated": {
"description": "Date concern was created, ISO8601 formatted",
"type": "string",
"format": "date-time"
},
"dateResolved": {
"description": "Date concern was resolved, ISO8601 formatted",
"format": "date-time"
},
"status": {
"description": "Status of concern",
"type": "string"
},
"category": {
"description": "Primary category of concern",
"type": "string"
},
"subCategories": {
"description": "Subcategories of concern",
"type": "array",
"items": {
"type": "string"
},
"uniqueItems": true
},
"location": {
"description": "Coordinates of concern, WGS84 formatted",
"type": "object",
"properties": {
"latitude": {
"description": "Latitude of concern",
"type": "number"
},
"longitude": {
"description": "Longitude of concern",
"type": "number"
}
}
},
"address": {
"description": "Address of concern",
"type": "string"
},
"summary": {
"description": "Summary of concern",
"type": "string"
}
},
"required": ["id", "source", "dateCreated", "status", "category", "location"]
}
}
41 changes: 41 additions & 0 deletions data_standards/crashes-instance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[
{
"id": 1403832,
"dateOccurred": "2016-01-01T00:56:45-05:00",
"vehicles": [
{ "category": "car" }
],
"location": {
"latitude": 42.300864811284519,
"longitude": -71.071316786983303
},
"address": "14 Corona Street",
"summary": "REPORTED INJURIES (P) (E) (F)"
},
{
"id": 1410434,
"dateOccurred": "2016-01-04T15:11:11-05:00",
"type": "pedestrian",
"persons": [
{ "category": "pedestrian", "quantity": 1 }
],
"location": {
"latitude": 42.332547160943271,
"longitude": -71.072124196868316
},
"summary": "PEDESTRIAN STRUCK (P) (E) (F)"
},
{
"id": 1410816,
"dateOccurred": "2016-01-04T18:42:24-05:00",
"vehicles": [
{ "category": "car" }
],
"location": {
"latitude": 42.311376108568268,
"longitude": -71.081614220307372
},
"address": "67 Brunswick Street, Boston",
"summary": "UNKNOWN IF INJURIES - ADVISE NEED FOR EMS (P) (E) (F)"
}
]
94 changes: 94 additions & 0 deletions data_standards/crashes-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"title": "Crashes",
"description": "Defines the structure of a set of crashes",
"type": "array",
"items": {
"title": "Crash",
"description": "Defines the structure of a crash",
"type": "object",
"properties": {
"id": {
"description": "Unique identifier of crash",
"type": "number"
},
"dateOccurred": {
"description": "Date crash occurred, ISO8601 formatted",
"type": "string",
"format": "date-time"
},
"location": {
"description": "Coordinates of crash, WGS84 formatted",
"type": "object",
"properties": {
"latitude": {
"description": "Latitude of crash",
"type": "number"
},
"longitude": {
"description": "Longitude of crash",
"type": "number"
}
}
},
"vehicles": {
"description": "Vehicles involved in crash",
"type": "array",
"items": {
"title": "Vehicle",
"description": "Defines the structure of a vehicle",
"type": "object",
"properties": {
"category": {
"description": "Category of vehicle",
"type": "string",
"enum": ["car", "bike", "taxi", "bus", "truck"]
},
"quantity": {
"description": "Quantity of vehicles of this category",
"type": "number"
}
},
"required": ["category"]
},
"uniqueItems": true
},
"persons": {
"description": "Persons involved in crash",
"type": "array",
"items": {
"title": "Person",
"description": "Defines the structure of a person",
"type": "object",
"properties": {
"category": {
"description": "Category of person",
"type": "string",
"enum": ["driver", "pedestrian", "cyclist", "other"]
},
"quantity": {
"description": "Quantity of persons",
"type": "number"
},
"injuryType": {
"description": "Type of injury",
"type": "string",
"enum": ["minor", "major", "fatal", "unknown"]
}
},
"required": ["category"]
},
"uniqueItems": true
},
"address": {
"description": "Address of crash",
"type": "string"
},
"summary": {
"description": "Summary of crash",
"type": "string"
}
},
"required": ["id", "dateOccurred", "location"]
}
}
139 changes: 139 additions & 0 deletions src/data_transformation/transform_concerns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Transform a concerns CSV into compatible JSON document.
# Author terryf82 https://github.com/terryf82

import argparse
import dateutil.parser as date_parser
import json
import os
import pandas as pd
from collections import OrderedDict

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--destination", type=str,
help="destination name")
parser.add_argument("-f", "--folder", type=str,
help="absolute path to destination folder")

args = parser.parse_args()

raw_path = os.path.join(args.folder, "raw")
if not os.path.exists(raw_path):
print raw_path+" not found, exiting"
exit(1)

valid_concerns = []
manual_concern_id = 1

print "searching "+raw_path+" for raw concerns file(s)"

for csv_file in os.listdir(raw_path):
print csv_file


df_concerns = pd.read_csv(os.path.join(raw_path, csv_file), na_filter=False)
dict_concerns = df_concerns.to_dict("records")

for key in dict_concerns:
if args.destination == "boston":
# Boston presently has concerns from two sources - VisionZero and SeeClickFix
if csv_file == "Vision_Zero_Entry.csv":
source = "visionzero"
# skip concerns that don't have a date or request type
if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "":
continue

else:
valid_concern = OrderedDict([
("id", key["OBJECTID"]),
("source", "visionzero"),
("dateCreated", key["REQUESTDATE"]),
("status", key["STATUS"]),
("category", key["REQUESTTYPE"]),
("location", OrderedDict([
("latitude", key["Y"]),
("longitude", key["X"])
]))
])

# only add summary property if data exists
if key["COMMENTS"] != "":
valid_concern.update({"summary": key["COMMENTS"]})

elif csv_file == "bos_scf.csv":
source = "seeclickfix"
# skip concerns that don't have a date or request type
if key["created"] == "" or key["summary"] == "":
continue

else:
valid_concern = OrderedDict([
("id", manual_concern_id),
("source", "seeclickfix"),
("dateCreated", key["created"]),
("status", "unknown"),
("category", key["summary"]),
("location", OrderedDict([
("latitude", key["Y"]),
("longitude", key["X"])
]))
])

# only add summary property if data exists
if key["description"] != "":
valid_concern.update({"summary": key["description"]})

valid_concerns.append(valid_concern)
manual_concern_id += 1

if args.destination == "dc":
# skip concerns that don't have a date or request type
if key["REQUESTDATE"] == "" or key["REQUESTTYPE"] == "":
continue

valid_concern = OrderedDict([
("id", key["OBJECTID"]),
("dateCreated", key["REQUESTDATE"]),
("status", key["STATUS"]),
("category", key["REQUESTTYPE"]),
("location", OrderedDict([
("latitude", key["Y"]),
("longitude", key["X"])
]))
])

# only add summary property if data exists
if key["COMMENTS"] != "":
valid_concern.update({"summary": key["COMMENTS"]})

valid_concerns.append(valid_concern)

elif args.destination == "cambridge":
# skip concerns that don't have a date or issue type
if key["ticket_created_date_time"] == "" or key["issue_type"] == "":
continue

valid_concern = OrderedDict([
("id", key["ticket_id"]),
("dateCreated", str(date_parser.parse(key["ticket_created_date_time"]))+"-05:00"),
("status", key["ticket_status"]),
("category", key["issue_type"]),
("location", OrderedDict([
("latitude", key["lat"]),
("longitude", key["lng"])
]))
])

# only add summary property if data exists
if key["issue_description"] != "":
valid_concern.update({"summary": key["issue_description"]})

valid_concerns.append(valid_concern)

print "done, {} valid concerns loaded".format(len(valid_concerns))

concerns_output = os.path.join(args.folder, "transformed/concerns.json")

with open(concerns_output, "w") as f:
json.dump(valid_concerns, f)

print "output written to {}".format(concerns_output)
Loading

0 comments on commit 505c4cf

Please sign in to comment.