Skip to content

Commit

Permalink
Merge pull request #3769 from LBNL-UCB-STI/inm/jupyter-map-analysis-u…
Browse files Browse the repository at this point in the history
…pdate

Jupyter: map analysis update
  • Loading branch information
nikolayilyin authored Aug 28, 2023
2 parents 7e8d1f3 + 3aa019c commit 51a423b
Show file tree
Hide file tree
Showing 9 changed files with 731 additions and 546 deletions.
3 changes: 2 additions & 1 deletion docker/jupyter-enhanced/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM jupyter/scipy-notebook

RUN pip install geopandas pandas pygeos boto s3fs shapely gcsfs
RUN pip install p2j pyproj pyrosm
RUN pip install p2j pyproj pyrosm
RUN pip install pyshp
157 changes: 145 additions & 12 deletions jupyter/map_analysis/network_linkstats.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import math\n",
"\n",
"pd.set_option('display.max_rows', 500)\n",
"pd.set_option('display.max_columns', 500)\n",
Expand All @@ -26,10 +27,13 @@
"source": [
"## reading files\n",
"\n",
"network = pd.read_csv(\"../local_files/network.csv.gz\")\n",
"linkstats = pd.read_csv(\"../local_files/b.6.linkstats.csv.gz\")\n",
"network = pd.read_csv(\"../local_files/network.1.csv.gz\")\n",
"network['attributeOrigType'].fillna(value='UNKNOWN', inplace=True)\n",
"\n",
"network.shape, linkstats.shape"
"linkstats = pd.read_csv(\"../local_files/linkstats.1.csv.gz\")\n",
"\n",
"print(network.shape, linkstats.shape)\n",
"# network['attributeOrigType'].value_counts()"
]
},
{
Expand Down Expand Up @@ -63,6 +67,7 @@
"## assuming files are for the same network and has the same number of links\n",
"\n",
"full_df = linkstats.merge(network, left_on='link', right_on='linkId', how='outer')\n",
"\n",
"full_df.head(2)"
]
},
Expand Down Expand Up @@ -91,11 +96,16 @@
"metadata": {},
"outputs": [],
"source": [
"## calculating the speed and free speed and speed in km/h\n",
"## calculating speed, free speed, speed in km/h, speed in mph\n",
"\n",
"full_df['speed'] = full_df['length'] / full_df['traveltime']\n",
"full_df['speed_km_h'] = full_df['speed'] * 3.6\n",
"\n",
"full_df['freespeed_km_h'] = full_df['freespeed'] * 3.6\n",
"full_df['speed_km_h'] = full_df['speed'] * 3.6\n",
"\n",
"full_df['freespeed_mph'] = full_df['freespeed'] * 2.237\n",
"full_df['speed_mph'] = full_df['speed'] * 2.237\n",
"\n",
"full_df.head(2)"
]
},
Expand All @@ -122,17 +132,26 @@
"source": [
"## description of speeds for each road type\n",
"\n",
"dfs = []\n",
"# dfs = []\n",
"dfs_free = []\n",
"\n",
"road_types = network['attributeOrigType'].unique()\n",
"for road_type in road_types:\n",
" filtered_network = full_df[full_df['attributeOrigType'] == road_type]\n",
" df = filtered_network[['speed', 'freespeed']].describe()\n",
" df.rename(columns={'speed':f'{road_type} speed', 'freespeed':f'{road_type} free speed'}, inplace=True)\n",
" dfs.append(df.transpose())\n",
" \n",
"speed_df = pd.concat(dfs)\n",
"speed_df"
" # df = filtered_network[['speed']].describe()\n",
" # df.rename(columns={'speed':f'{road_type} speed'}, inplace=True)\n",
" # dfs.append(df.transpose())\n",
" \n",
" df = filtered_network[['freespeed']].describe()\n",
" df.rename(columns={'freespeed':f'{road_type} free speed'}, inplace=True)\n",
" dfs_free.append(df.transpose())\n",
" \n",
"# speed_df = pd.concat(dfs)\n",
"# display(speed_df)\n",
"\n",
"free_speed_df = pd.concat(dfs_free)\n",
"display(free_speed_df)"
]
},
{
Expand All @@ -141,12 +160,126 @@
"id": "8c3a9e75-d599-488e-acd8-9a9cc6269e7d",
"metadata": {},
"outputs": [],
"source": [
"## how many of each road type are there with speed less than threshold\n",
"\n",
"grouped_df = full_df.groupby('attributeOrigType')[['linkFreeSpeed']].agg(\n",
" less_than_20=('linkFreeSpeed', lambda gr:gr[gr < 20].count()),\n",
" more_than_20=('linkFreeSpeed', lambda gr:gr[gr >= 20].count())\n",
")\n",
"\n",
"grouped_df.rename({'less_than_20':\"less than 20\", 'more_than_20':\"more than 20\"}, axis='columns', inplace=True)\n",
"\n",
"ax = grouped_df.plot(kind='bar', stacked=True, rot=20, figsize=(20,4))\n",
"\n",
"# if numbers are required on top of bars:\n",
"#\n",
"# for (container, pdd, color) in zip(ax.containers, [0,10], ['blue', 'orange']):\n",
"# ax.bar_label(container, padding=pdd, color=color)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "acebf0fc-cc25-4e83-8576-b1ed78b30a33",
"metadata": {},
"outputs": [],
"source": [
"grouped_df.sort_values('more than 20')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dec9f6ec-45c0-4dc3-8cc1-b747e54ecc2f",
"metadata": {},
"outputs": [],
"source": [
"# overwriteLinkParam file generation\n",
"# for links with speed less than threshold\n",
"\n",
"# expected file header:\n",
"# link_id,capacity,free_speed,length,lanes\n",
"\n",
"threshold_mph = 20\n",
"threshold_ms = 10 # threshold_mph / 2.237\n",
"\n",
"linkId_to_values = {}\n",
"\n",
"links_with_speed_less_than_threshold = set(linkstats2[linkstats2['freespeed'] < threshold_ms]['link'].unique())\n",
"print(f\"there are {len(links_with_speed_less_than_threshold)} links with free speed less than {threshold_ms} meters per second.\")\n",
"\n",
"selected_columns = ['linkId','linkCapacity','linkFreeSpeed','linkLength','numberOfLanes','attributeOrigType']\n",
"df = network2[network2['linkId'].isin(links_with_speed_less_than_threshold)][selected_columns]\n",
"df.rename(columns={'linkId':'link_id', \n",
" 'linkCapacity':'capacity', \n",
" 'linkFreeSpeed':'free_speed', \n",
" 'linkLength':'length',\n",
" 'numberOfLanes':'lanes',\n",
" 'attributeOrigType': 'road_type'}, inplace=True)\n",
"\n",
"\n",
"def get_mean_speed(row):\n",
" road_type = row['road_type']\n",
" \n",
" if road_type and str(road_type) != 'nan':\n",
" mean_speed = road_type2speed.get(road_type)\n",
" if mean_speed:\n",
" return mean_speed\n",
" else:\n",
" print(road_type)\n",
" else:\n",
" return road_type2speed.get('unclassified')\n",
"\n",
" \n",
"# ax = df['free_speed'].hist(bins=30, label='before', alpha=0.4)\n",
"df['free_speed'] = df.apply(get_mean_speed, axis=1)\n",
"# df['free_speed'].hist(bins=30, label='after', ax=ax, alpha=0.4)\n",
"# ax.legend()\n",
"\n",
"display(df['road_type'].value_counts(dropna=False))\n",
"display(df.head())\n",
"df[['link_id','capacity','free_speed','length','lanes']].to_csv('../local_files/overwriteLinkParamFile.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba2cdeeb-20c5-4d55-914d-ebc4a0ec9b21",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc09b33c-5a14-41f4-b3fe-72eb5fbc28c7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6d41c7c-9d73-4d82-81d8-af0d7e8f0e30",
"id": "9c8906ff-8e9c-418d-813c-2138fdd7ccfc",
"metadata": {},
"outputs": [],
"source": [
"dd = {\n",
" 'l1' : [1,2,3] * 10,\n",
" 'l2' : list(range(12,12 + 15)) * 2\n",
"}\n",
"\n",
"df = pd.DataFrame.from_dict(dd)\n",
"display(df.head())\n",
"\n",
"df.groupby('l1')[['l2']].agg(l3=('l2', lambda gr: gr[gr >= 16].count()), l4=('l2', lambda gr: gr[gr < 16].count()) )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df98f6c0-d777-45b5-9172-faf0410d2ac6",
"metadata": {},
"outputs": [],
"source": []
Expand Down
Loading

0 comments on commit 51a423b

Please sign in to comment.