From ad0c8c33e5cd0eb2132e786feb790fc98e2a5ee0 Mon Sep 17 00:00:00 2001 From: Nikolay Ilyin Date: Thu, 31 Aug 2023 10:16:48 +0300 Subject: [PATCH 1/5] using named parameter --- .../scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala b/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala index 4579aef3a6b..ec44923851b 100755 --- a/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala +++ b/src/main/scala/beam/agentsim/agents/modalbehaviors/ChoosesMode.scala @@ -1012,7 +1012,7 @@ trait ChoosesMode { beamServices.geo.wgs2Utm(legs.head.travelPath.startPoint.loc), legs.head.startTime, beamServices.geo.wgs2Utm(legs.last.travelPath.endPoint.loc), - wheelchairUser, + withWheelchair = wheelchairUser, requestTime = _currentTick, requester = self, rideHailServiceSubscription = attributes.rideHailServiceSubscription, From 030bb08256ed25b363621bd8ab5fe71feef82b62 Mon Sep 17 00:00:00 2001 From: Nikolay Ilyin Date: Mon, 4 Sep 2023 16:56:43 +0300 Subject: [PATCH 2/5] boto3 added --- docker/jupyter-enhanced/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/jupyter-enhanced/Dockerfile b/docker/jupyter-enhanced/Dockerfile index 79cf3455264..12a07b71f81 100644 --- a/docker/jupyter-enhanced/Dockerfile +++ b/docker/jupyter-enhanced/Dockerfile @@ -2,4 +2,5 @@ FROM jupyter/scipy-notebook RUN pip install geopandas pandas pygeos boto s3fs shapely gcsfs RUN pip install p2j pyproj pyrosm -RUN pip install pyshp \ No newline at end of file +RUN pip install pyshp +RUN pip install boto3 \ No newline at end of file From 307db4f9795ef6684e0061c25dfdcf83ee0a9e97 Mon Sep 17 00:00:00 2001 From: Nikolay Ilyin Date: Tue, 5 Sep 2023 16:09:18 +0300 Subject: [PATCH 3/5] speeds analysis for physsim network --- .../physsim_network_example.ipynb | 158 ++++++++++++++---- .../map_analysis/physsim_network_example.py | 140 +++++++++++++--- 2 files changed, 244 insertions(+), 54 deletions(-) diff --git a/jupyter/map_analysis/physsim_network_example.ipynb b/jupyter/map_analysis/physsim_network_example.ipynb index 804bb6e3dc3..1818c6cec29 100644 --- a/jupyter/map_analysis/physsim_network_example.ipynb +++ b/jupyter/map_analysis/physsim_network_example.ipynb @@ -21,7 +21,32 @@ "\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", - "pd.set_option('display.width', 1000)" + "pd.set_option('display.width', 1000)\n", + "pd.set_option('max_colwidth', None)\n", + "\n", + "\n", + "unspecified_link_type_name = 'unspecified'\n", + "\n", + "def get_direction(row):\n", + " d_x = 'N'\n", + " d_y = 'E'\n", + " if row['fromLocationX'] > row['toLocationX']:\n", + " d_x = 'S'\n", + " if row['fromLocationY'] > row['toLocationY']:\n", + " d_y = 'W'\n", + " return d_x + d_y\n", + "\n", + "def read_network(file_path):\n", + " df = pd.read_csv(file_path)\n", + "\n", + " df['direction'] = df.apply(get_direction, axis=1)\n", + " df['freespeed_mph'] = df['linkFreeSpeed'] * 2.237\n", + " df['attributeOrigType'].fillna(value=unspecified_link_type_name, inplace=True)\n", + "\n", + " df[\"attributeOrigId\"] = df['attributeOrigId'].astype(str)\n", + " df[\"attributeOrigId\"] = pd.to_numeric(df[\"attributeOrigId\"], errors='coerce').fillna(0).astype('Int64')\n", + " \n", + " return df" ] }, { @@ -35,25 +60,22 @@ "source": [ "# reading network.csv.gz file from simulation output folder\n", "\n", - "dtype = { 'attributeOrigId': str }\n", - "out_network = pd.read_csv(\"../beam_root/output/sf-light/sf-light-0.5k__2023-06-15_16-11-49_obh/network.csv.gz\", dtype=dtype)\n", - "out_network[\"str_id\"] = out_network['attributeOrigId'].astype(str)\n", - "out_network[\"id\"] = pd.to_numeric(out_network[\"str_id\"], errors='coerce').fillna(0).astype('Int64')\n", - "\n", - "# out_network.drop(columns=['str_id', 'attributeOrigId'], inplace=True)\n", - "out_network.head(3)" + "network = read_network(\"../local_files/sfbay-baseline-20230815-30pct-convergence_beam_year-2018-iteration--1_network.csv.gz\")\n", + "network.head(2)" ] }, { "cell_type": "code", "execution_count": null, - "id": "1ca82ecd-48b5-46af-8815-d44b22cb1e56", + "id": "4857212d-1c76-4348-93a0-176d99127603", "metadata": {}, "outputs": [], "source": [ "# converting CRS of output network to lat-lon CRS epsg:4326 and creating a geometry for new geopandas data frame\n", "\n", - "crs_to = CRS.from_epsg(4326) # the lat lon CRS\n", + "new_crs = 4326\n", + "\n", + "crs_to = CRS.from_epsg(new_crs) # the lat lon CRS\n", "crs_from = CRS.from_epsg(26910) # sf crs\n", "transformer = Transformer.from_crs(crs_from, crs_to)\n", "\n", @@ -63,22 +85,79 @@ " mls = MultiLineString([[[from_y, from_x], [to_y, to_x]]])\n", " return mls\n", " \n", - "out_geometry = out_network.apply(out_row_to_geometry, axis=1)\n", - "out_geometry.head(2)" + "geometry = network.apply(out_row_to_geometry, axis=1)\n", + "geometry.head(2)" ] }, { "cell_type": "code", "execution_count": null, - "id": "e37337a3-3f21-48e1-9b0f-73a273188294", + "id": "89cc417c-8d01-419a-a065-b70190c594ff", "metadata": {}, "outputs": [], "source": [ "# creating geopandas data frame\n", "\n", - "out_network = gpd.GeoDataFrame(out_network, crs='epsg:4326', geometry=out_geometry)\n", - "# out_network.drop(columns=['fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY'], inplace=True)\n", - "out_network.head(2)" + "geo_df = gpd.GeoDataFrame(network, crs=f'epsg:{new_crs}', geometry=geometry)\n", + "display(geo_df.head(2))\n", + "\n", + "# saving GeoDataFrame as shape file\n", + "# geo_df.to_file(\"../local_files/sfbay-baseline-20230815-30pct-convergence_beam_year-2018-iteration--1_network.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c477309e-d081-4071-bccf-401449afa99a", + "metadata": {}, + "outputs": [], + "source": [ + "## how many of each road type are there with speed less than threshold\n", + "\n", + "speed_field = 'freespeed_mph'\n", + "threshold = 20\n", + "moreX_name = f\"more than {threshold} mph\"\n", + "lessX_name = f\"less than {threshold} mph\"\n", + "\n", + "grouped_df = network.groupby('attributeOrigType')[[speed_field]].agg(\n", + " less_than_X=(speed_field, lambda gr:gr[gr < threshold].count()),\n", + " more_than_X=(speed_field, lambda gr:gr[gr >= threshold].count())\n", + ")\n", + "\n", + "grouped_df.rename({'less_than_X':lessX_name, 'more_than_X':moreX_name}, axis='columns', inplace=True)\n", + "\n", + "ax = grouped_df.plot(kind='bar', stacked=False, rot=0, figsize=(20,4))\n", + "ax.set_xlabel(\"\")\n", + "\n", + "# plt.savefig('../local_files/vta-beam-network/link_speed_graph.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53ae2b66-1dd4-4fd1-8b6a-7992590f91f3", + "metadata": {}, + "outputs": [], + "source": [ + "## description of speeds for each road type\n", + "\n", + "dfs = []\n", + "\n", + "road_types = network['attributeOrigType'].unique()\n", + "for road_type in road_types:\n", + " filtered_network = network[network['attributeOrigType'] == road_type]\n", + " \n", + " df = filtered_network[['freespeed_mph']].describe()\n", + " df.rename(columns={'freespeed_mph':f'{road_type}'}, inplace=True)\n", + " dfs.append(df.transpose())\n", + " \n", + "speed_df = pd.concat(dfs)\n", + "\n", + "speed_description = grouped_df.merge(speed_df[['mean']], left_index=True, right_index=True, how='outer')\n", + "speed_description.sort_values('mean', ascending=False, inplace=True)\n", + "speed_description.rename(columns={'mean':'mean speed in mph'}, inplace=True)\n", + "\n", + "speed_description.style.set_properties(**{'width': '150px'})" ] }, { @@ -90,19 +169,16 @@ "source": [ "# filtering and plotting\n", "\n", - "fig, axs = plt.subplots(1, 3, figsize=(13, 5), dpi=100, subplot_kw={'aspect': 1})\n", - "fig.tight_layout(pad=5.0)\n", + "fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1})\n", "\n", - "filtered_1 = out_network[(out_network['id'] > 4 * 10e7)]\n", - "filtered_2 = out_network[out_network['id'] == 0]\n", - "print(f\"There are {len(filtered_1)} and {len(filtered_2)} lines in filtered network.\")\n", + "filtered_1 = out_network[(out_network['attributeOrigType'] == unspecified_link_type_name)]\n", + "filtered_2 = out_network[(out_network['attributeOrigType'] != unspecified_link_type_name)]\n", "\n", - "filtered_1.plot(ax=axs[0])\n", - "filtered_2.plot(ax=axs[1])\n", - "filtered_1['linkLength'].hist(ax=axs[2], bins=20, color='green', alpha=0.4, label='filtered')\n", - "filtered_2['linkLength'].hist(ax=axs[2], bins=20, color='blue', alpha=0.4, label='with OSM ID 0')\n", + "filtered_1.plot(ax=ax, label=f'unspecified ({len(filtered_1)} links)', color='red', lw=0.6)\n", + "filtered_2.plot(ax=ax, label=f'the rest of ({len(filtered_2)} links)', color='blue', lw=0.2)\n", "\n", - "axs[2].legend()" + "ax.legend()\n", + "plt.savefig('../local_files/vta-beam-network/specified_vs_unspecified_links.png')" ] }, { @@ -112,9 +188,18 @@ "metadata": {}, "outputs": [], "source": [ - "# save the network as shape file\n", + "# filtering and plotting\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1})\n", + "\n", + "filtered_1 = out_network[(out_network['attributeOrigType'] != unspecified_link_type_name) & (out_network['freespeed_mph'] < 20)]\n", + "filtered_2 = out_network[(out_network['attributeOrigType'] != unspecified_link_type_name) & (out_network['freespeed_mph'] >= 20)]\n", "\n", - "out_network.to_file(\"output_network.shp\")" + "filtered_1.plot(ax=ax, label=f'specified link types and free speed < 20 mph ({len(filtered_1)} links)', color='red', lw=2.6)\n", + "filtered_2.plot(ax=ax, label=f'specified link types and free speed > 20 mph ({len(filtered_2)} links)', color='blue', lw=0.2)\n", + "\n", + "ax.legend()\n", + "plt.savefig('../local_files/vta-beam-network/specified_links_slow_vs_fast.png')" ] }, { @@ -123,7 +208,22 @@ "id": "209d667b-fc15-47f3-a413-144e5112c0a8", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# filtering and plotting\n", + "\n", + "link_type = 'secondary'\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1})\n", + "\n", + "filtered_1 = out_network[(out_network['attributeOrigType'] == link_type) & (out_network['freespeed_mph'] < 20)]\n", + "filtered_2 = out_network[(out_network['attributeOrigType'] == link_type) & (out_network['freespeed_mph'] >= 20)]\n", + "\n", + "filtered_1.plot(ax=ax, label=f'{link_type} and free speed < 20 mph ({len(filtered_1)} links)', color='red', lw=2.6)\n", + "filtered_2.plot(ax=ax, label=f'{link_type} and free speed > 20 mph ({len(filtered_2)} links)', color='blue', lw=0.2)\n", + "\n", + "ax.legend()\n", + "# plt.savefig('../local_files/vta-beam-network/specified_links_slow_vs_fast.png')" + ] } ], "metadata": { diff --git a/jupyter/map_analysis/physsim_network_example.py b/jupyter/map_analysis/physsim_network_example.py index e9026c16fc2..6e407a9ad27 100644 --- a/jupyter/map_analysis/physsim_network_example.py +++ b/jupyter/map_analysis/physsim_network_example.py @@ -19,6 +19,31 @@ pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) +pd.set_option('max_colwidth', None) + + +unspecified_link_type_name = 'unspecified' + +def get_direction(row): + d_x = 'N' + d_y = 'E' + if row['fromLocationX'] > row['toLocationX']: + d_x = 'S' + if row['fromLocationY'] > row['toLocationY']: + d_y = 'W' + return d_x + d_y + +def read_network(file_path): + df = pd.read_csv(file_path) + + df['direction'] = df.apply(get_direction, axis=1) + df['freespeed_mph'] = df['linkFreeSpeed'] * 2.237 + df['attributeOrigType'].fillna(value=unspecified_link_type_name, inplace=True) + + df["attributeOrigId"] = df['attributeOrigId'].astype(str) + df["attributeOrigId"] = pd.to_numeric(df["attributeOrigId"], errors='coerce').fillna(0).astype('Int64') + + return df # In[ ]: @@ -26,13 +51,8 @@ # reading network.csv.gz file from simulation output folder -dtype = { 'attributeOrigId': str } -out_network = pd.read_csv("../beam_root/output/sf-light/sf-light-0.5k__2023-06-15_16-11-49_obh/network.csv.gz", dtype=dtype) -out_network["str_id"] = out_network['attributeOrigId'].astype(str) -out_network["id"] = pd.to_numeric(out_network["str_id"], errors='coerce').fillna(0).astype('Int64') - -# out_network.drop(columns=['str_id', 'attributeOrigId'], inplace=True) -out_network.head(3) +network = read_network("../local_files/sfbay-baseline-20230815-30pct-convergence_beam_year-2018-iteration--1_network.csv.gz") +network.head(2) # In[ ]: @@ -40,7 +60,9 @@ # converting CRS of output network to lat-lon CRS epsg:4326 and creating a geometry for new geopandas data frame -crs_to = CRS.from_epsg(4326) # the lat lon CRS +new_crs = 4326 + +crs_to = CRS.from_epsg(new_crs) # the lat lon CRS crs_from = CRS.from_epsg(26910) # sf crs transformer = Transformer.from_crs(crs_from, crs_to) @@ -50,8 +72,8 @@ def out_row_to_geometry(df_row): mls = MultiLineString([[[from_y, from_x], [to_y, to_x]]]) return mls -out_geometry = out_network.apply(out_row_to_geometry, axis=1) -out_geometry.head(2) +geometry = network.apply(out_row_to_geometry, axis=1) +geometry.head(2) # In[ ]: @@ -59,9 +81,58 @@ def out_row_to_geometry(df_row): # creating geopandas data frame -out_network = gpd.GeoDataFrame(out_network, crs='epsg:4326', geometry=out_geometry) -# out_network.drop(columns=['fromLocationX', 'fromLocationY', 'toLocationX', 'toLocationY'], inplace=True) -out_network.head(2) +geo_df = gpd.GeoDataFrame(network, crs=f'epsg:{new_crs}', geometry=geometry) +display(geo_df.head(2)) + +# saving GeoDataFrame as shape file +# geo_df.to_file("../local_files/sfbay-baseline-20230815-30pct-convergence_beam_year-2018-iteration--1_network.shp") + + +# In[ ]: + + +## how many of each road type are there with speed less than threshold + +speed_field = 'freespeed_mph' +threshold = 20 +moreX_name = f"more than {threshold} mph" +lessX_name = f"less than {threshold} mph" + +grouped_df = network.groupby('attributeOrigType')[[speed_field]].agg( + less_than_X=(speed_field, lambda gr:gr[gr < threshold].count()), + more_than_X=(speed_field, lambda gr:gr[gr >= threshold].count()) +) + +grouped_df.rename({'less_than_X':lessX_name, 'more_than_X':moreX_name}, axis='columns', inplace=True) + +ax = grouped_df.plot(kind='bar', stacked=False, rot=0, figsize=(20,4)) +ax.set_xlabel("") + +# plt.savefig('../local_files/vta-beam-network/link_speed_graph.png') + + +# In[ ]: + + +## description of speeds for each road type + +dfs = [] + +road_types = network['attributeOrigType'].unique() +for road_type in road_types: + filtered_network = network[network['attributeOrigType'] == road_type] + + df = filtered_network[['freespeed_mph']].describe() + df.rename(columns={'freespeed_mph':f'{road_type}'}, inplace=True) + dfs.append(df.transpose()) + +speed_df = pd.concat(dfs) + +speed_description = grouped_df.merge(speed_df[['mean']], left_index=True, right_index=True, how='outer') +speed_description.sort_values('mean', ascending=False, inplace=True) +speed_description.rename(columns={'mean':'mean speed in mph'}, inplace=True) + +speed_description.style.set_properties(**{'width': '150px'}) # In[ ]: @@ -69,31 +140,50 @@ def out_row_to_geometry(df_row): # filtering and plotting -fig, axs = plt.subplots(1, 3, figsize=(13, 5), dpi=100, subplot_kw={'aspect': 1}) -fig.tight_layout(pad=5.0) +fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1}) -filtered_1 = out_network[(out_network['id'] > 4 * 10e7)] -filtered_2 = out_network[out_network['id'] == 0] -print(f"There are {len(filtered_1)} and {len(filtered_2)} lines in filtered network.") +filtered_1 = out_network[(out_network['attributeOrigType'] == unspecified_link_type_name)] +filtered_2 = out_network[(out_network['attributeOrigType'] != unspecified_link_type_name)] -filtered_1.plot(ax=axs[0]) -filtered_2.plot(ax=axs[1]) -filtered_1['linkLength'].hist(ax=axs[2], bins=20, color='green', alpha=0.4, label='filtered') -filtered_2['linkLength'].hist(ax=axs[2], bins=20, color='blue', alpha=0.4, label='with OSM ID 0') +filtered_1.plot(ax=ax, label=f'unspecified ({len(filtered_1)} links)', color='red', lw=0.6) +filtered_2.plot(ax=ax, label=f'the rest of ({len(filtered_2)} links)', color='blue', lw=0.2) -axs[2].legend() +ax.legend() +plt.savefig('../local_files/vta-beam-network/specified_vs_unspecified_links.png') # In[ ]: -# save the network as shape file +# filtering and plotting + +fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1}) + +filtered_1 = out_network[(out_network['attributeOrigType'] != unspecified_link_type_name) & (out_network['freespeed_mph'] < 20)] +filtered_2 = out_network[(out_network['attributeOrigType'] != unspecified_link_type_name) & (out_network['freespeed_mph'] >= 20)] -out_network.to_file("output_network.shp") +filtered_1.plot(ax=ax, label=f'specified link types and free speed < 20 mph ({len(filtered_1)} links)', color='red', lw=2.6) +filtered_2.plot(ax=ax, label=f'specified link types and free speed > 20 mph ({len(filtered_2)} links)', color='blue', lw=0.2) + +ax.legend() +plt.savefig('../local_files/vta-beam-network/specified_links_slow_vs_fast.png') # In[ ]: +# filtering and plotting + +link_type = 'secondary' + +fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1}) + +filtered_1 = out_network[(out_network['attributeOrigType'] == link_type) & (out_network['freespeed_mph'] < 20)] +filtered_2 = out_network[(out_network['attributeOrigType'] == link_type) & (out_network['freespeed_mph'] >= 20)] + +filtered_1.plot(ax=ax, label=f'{link_type} and free speed < 20 mph ({len(filtered_1)} links)', color='red', lw=2.6) +filtered_2.plot(ax=ax, label=f'{link_type} and free speed > 20 mph ({len(filtered_2)} links)', color='blue', lw=0.2) +ax.legend() +# plt.savefig('../local_files/vta-beam-network/specified_links_slow_vs_fast.png') From 6cf902f0686b0859ecab0fe05f4519e01e70b910 Mon Sep 17 00:00:00 2001 From: Nikolay Ilyin Date: Tue, 5 Sep 2023 18:45:06 +0300 Subject: [PATCH 4/5] analysis of linkstats and network files as a whole --- jupyter/map_analysis/network_linkstats.ipynb | 435 +++++++++++++++---- jupyter/map_analysis/network_linkstats.py | 387 ++++++++++++++--- 2 files changed, 654 insertions(+), 168 deletions(-) diff --git a/jupyter/map_analysis/network_linkstats.ipynb b/jupyter/map_analysis/network_linkstats.ipynb index e5152bcf6c0..3d712272d0d 100644 --- a/jupyter/map_analysis/network_linkstats.ipynb +++ b/jupyter/map_analysis/network_linkstats.ipynb @@ -10,12 +10,103 @@ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", - "import math\n", + "import geopandas as gpd\n", + "\n", + "from pyproj import CRS, Transformer\n", + "from geopandas import GeoDataFrame\n", + "from shapely.geometry import Point\n", + "from shapely.geometry.multilinestring import MultiLineString\n", + "from IPython.display import clear_output\n", "\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.width', 1000)\n", - "pd.set_option('max_colwidth', None)" + "pd.set_option('max_colwidth', None)\n", + "\n", + "\n", + "unspecified_link_type_name = 'unspecified'\n", + "\n", + "\n", + "def get_direction(row):\n", + " d_x = 'North'\n", + " d_y = 'East'\n", + " if row['fromLocationX'] > row['toLocationX']:\n", + " d_x = 'South'\n", + " if row['fromLocationY'] > row['toLocationY']:\n", + " d_y = 'West'\n", + " return f'{d_x} - {d_y}'\n", + "\n", + "\n", + "def read_network(file_path):\n", + " df = pd.read_csv(file_path)\n", + "\n", + " df['attributeOrigType'].fillna(value=unspecified_link_type_name, inplace=True)\n", + "\n", + " df[\"attributeOrigId\"] = df['attributeOrigId'].astype(str)\n", + " df[\"attributeOrigId\"] = pd.to_numeric(df[\"attributeOrigId\"], errors='coerce').fillna(0).astype('Int64')\n", + " df['direction'] = df.apply(get_direction, axis=1)\n", + " \n", + " return df\n", + "\n", + "\n", + "def read_linkstats(file_path):\n", + " df = pd.read_csv(file_path)\n", + "\n", + " df['speed_ms'] = df['length'] / df['traveltime']\n", + " df['speed_mph'] = df['speed_ms'] * 2.237\n", + " df['freespeed_mph'] = df['freespeed'] * 2.237\n", + "\n", + " df['speed_delta_mph'] = df['freespeed_mph'] - df['speed_mph']\n", + " \n", + " return df\n", + "\n", + "\n", + "# sf-bay area CRS is 26910\n", + "def read_geo_network(network_path, network_crs):\n", + " n_df = read_network(network_path)\n", + " \n", + " crs_to_id = 4326 # the LAT LON CRS\n", + " crs_to = CRS.from_epsg(crs_to_id)\n", + " crs_from = CRS.from_epsg(network_crs)\n", + " transformer = Transformer.from_crs(crs_from, crs_to)\n", + "\n", + " def out_row_to_geometry(df_row):\n", + " (from_x, from_y) = transformer.transform(df_row['fromLocationX'], df_row['fromLocationY'])\n", + " (to_x, to_y) = transformer.transform(df_row['toLocationX'], df_row['toLocationY'])\n", + " mls = MultiLineString([[[from_y, from_x], [to_y, to_x]]])\n", + " return mls\n", + "\n", + " geometry = n_df.apply(out_row_to_geometry, axis=1)\n", + " geo_df = gpd.GeoDataFrame(n_df, crs=f'epsg:{crs_to_id}', geometry=geometry)\n", + " return geo_df\n", + " \n", + "\n", + "def read_full_network(linkstats_path, network_path):\n", + " n_df = read_network(network_path)\n", + " l_df = read_linkstats(linkstats_path)\n", + " \n", + " # simple sanity check\n", + " links_ids_1 = set(l_df['link'].unique())\n", + " links_ids_2 = set(n_df['linkId'].unique())\n", + " if (len(links_ids_1 - links_ids_2) != 0):\n", + " print(f\"SOMETHING IS WRONG!! Links are not the same from both files!\")\n", + "\n", + " full_df = l_df.merge(n_df, left_on='link', right_on='linkId', how='outer')\n", + " return full_df\n", + "\n", + "\n", + "## to do sanity check after merging linkstats and network\n", + "## to ensure that the linkstats was produced from the network\n", + "def do_sanity_check(full_df):\n", + "\n", + " def row_sanity_check(row):\n", + " return row['to'] == row['toNodeId'] \\\n", + " and row['from'] == row['fromNodeId'] \\\n", + " and row['freespeed'] == row['linkFreeSpeed']\n", + "\n", + " return full_df.apply(lambda r: 'equal' if row_sanity_check(r) else 'NOT equal', axis=1).value_counts()\n", + "\n", + "'init complete'" ] }, { @@ -25,168 +116,286 @@ "metadata": {}, "outputs": [], "source": [ - "## reading files\n", + "## reading network file\n", + "# network = read_network(\"../local_files/vta-beam-network/network.csv.gz\")\n", + "# print(f\"there are {len(network)} records\")\n", + "# display(network.head(2))\n", "\n", - "network = pd.read_csv(\"../local_files/network.1.csv.gz\")\n", - "network['attributeOrigType'].fillna(value='UNKNOWN', inplace=True)\n", + "## reading linkstats file\n", + "# linkstats = read_linkstats(\"../local_files/vta-beam-network/1.0.linkstats.csv.gz\")\n", + "# print(f\"there are {len(linkstats)} records\")\n", + "# display(linkstats.head(2))\n", "\n", - "linkstats = pd.read_csv(\"../local_files/linkstats.1.csv.gz\")\n", + "# reading full network\n", + "full_network = read_full_network(\n", + " linkstats_path = \"../local_files/vta-beam-network/1.0.linkstats.csv.gz\", \n", + " network_path = \"../local_files/vta-beam-network/network.csv.gz\",\n", + ")\n", "\n", - "print(network.shape, linkstats.shape)\n", - "# network['attributeOrigType'].value_counts()" + "display(full_network.head(2))" ] }, { "cell_type": "code", "execution_count": null, - "id": "4a1443c5-9aef-407b-a133-153b133046f8", + "id": "651a3e08-e221-41ef-910a-f867c86af2aa", "metadata": {}, "outputs": [], "source": [ - "network.head(2)" + "def avg_speed_vol(data):\n", + " if data['volume'].sum() > 0:\n", + " return np.average(data['speed_mph'], weights=data['volume'])\n", + " return np.average(data['speed_mph'])\n", + "\n", + "def total_volume(data):\n", + " return np.sum(data['volume'])\n", + "\n", + "def average_freespeed(data):\n", + " return np.average(data['freespeed_mph'])\n", + "\n", + "def speeds_analysis(data):\n", + " return pd.Series({\n", + " \"average weighted speed\" : avg_speed_vol(data),\n", + " \"total volume\": total_volume(data),\n", + " \"average freespeed\": average_freespeed(data),\n", + " \"5 percentile of speed\": np.percentile(data['speed_mph'], 5),\n", + " \"95 percentile of speed\": np.percentile(data['speed_mph'], 95),\n", + " })\n", + "\n", + "\n", + "df = full_network #.head(100000)\n", + "df_g = df.groupby(['hour','attributeOrigType']).apply(speeds_analysis).reset_index()\n", + "df_g['total volume weighted'] = df_g['total volume'] / df_g['total volume'].max()\n", + "\n", + "road_types_df = df_g.groupby('attributeOrigType')['total volume'].sum().reset_index().sort_values('total volume', ascending=False)\n", + "road_types_ordered_by_volume = list(road_types_df['attributeOrigType'])\n", + "\n", + "hours = df_g['hour'].unique()\n", + "\n", + "plt.figure(figsize=(15,7))\n", + "plt.ylim(0, 70)\n", + "\n", + "N_road_types = 6\n", + "\n", + "for road_type in road_types_ordered_by_volume[:6]:\n", + " df_g_f = df_g[df_g['attributeOrigType'] == road_type]\n", + " avg_speed = df_g_f['average weighted speed']\n", + " \n", + " size_multiplier = df_g_f['total volume weighted'].sum() * 20\n", + " size = df_g_f['total volume weighted'] * size_multiplier\n", + " \n", + " plt.scatter(x=hours, y=avg_speed, s=size, label=f\"avg speed mph [{road_type}]\", alpha=0.6)\n", + " plt.plot(hours, avg_speed, alpha=0.2)\n", + "\n", + "plt.legend()\n", + "plt.title(f\"Average speed for road types weighted by volume (top {N_road_types} road types by volume)\")\n", + "plt.show()" ] }, { "cell_type": "code", "execution_count": null, - "id": "fab21b08-a08c-4982-869b-e62fee36f3e1", + "id": "82d6d8f1-96fa-4721-961c-240546fe05e5", "metadata": {}, "outputs": [], "source": [ - "linkstats.head(2)" + "def avg_speed_vol(data):\n", + " if data['volume'].sum() > 0:\n", + " return np.average(data['speed_mph'], weights=data['volume'])\n", + " return np.average(data['speed_mph'])\n", + "\n", + "def total_volume(data):\n", + " return np.sum(data['volume'])\n", + "\n", + "def average_freespeed(data):\n", + " return np.average(data['freespeed_mph'])\n", + "\n", + "def speeds_analysis(data):\n", + " return pd.Series({\n", + " \"average weighted speed\" : avg_speed_vol(data),\n", + " \"total volume\": total_volume(data),\n", + " \"average freespeed\": average_freespeed(data),\n", + " \"5 percentile of speed\": np.percentile(data['speed_mph'], 5),\n", + " \"95 percentile of speed\": np.percentile(data['speed_mph'], 95),\n", + " })\n", + "\n", + "\n", + "links_with_most_volume = set(full_network[full_network['volume'] > np.percentile(full_network['volume'], 99.5)]['link'].unique())\n", + "df = full_network[full_network['link'].isin(links_with_most_volume)]\n", + "\n", + "df_g = df.groupby(['hour','attributeOrigType']).apply(speeds_analysis).reset_index()\n", + "df_g['total volume weighted'] = df_g['total volume'] / df_g['total volume'].max()\n", + "\n", + "road_types_df = df_g.groupby('attributeOrigType')['total volume'].sum().reset_index().sort_values('total volume', ascending=False)\n", + "# display(road_types_df)\n", + "\n", + "road_types_ordered_by_volume = list(road_types_df['attributeOrigType'])\n", + "\n", + "hours = df_g['hour'].unique()\n", + "\n", + "plt.figure(figsize=(15,4))\n", + "plt.ylim(0, 70)\n", + "\n", + "N_road_types = 3\n", + "\n", + "for road_type in road_types_ordered_by_volume[:N_road_types]:\n", + " df_g_f = df_g[df_g['attributeOrigType'] == road_type]\n", + " avg_speed = df_g_f['average weighted speed']\n", + " min_speed = df_g_f[\"5 percentile of speed\"]\n", + " \n", + " size_multiplier = df_g_f['total volume weighted'].sum() * 20\n", + " size = df_g_f['total volume weighted'] * size_multiplier\n", + " \n", + " scatterplot = plt.scatter(x=hours, y=avg_speed, s=size, label=f\"avg speed mph [{road_type}, volume {df_g_f['total volume'].sum()}]\", alpha=0.4)\n", + " col = scatterplot.get_facecolors()[0].tolist()\n", + " plt.plot(hours, avg_speed, alpha=0.2, c=col)\n", + " plt.plot(hours, min_speed, alpha=0.2, label=f\"5 percentile for speed mph [{road_type}]\", c=col)\n", + "\n", + "plt.legend()\n", + "plt.title(f\"Average speed for top 0.5% links by volume, weighted by volume (top {N_road_types} road types by volume)\")\n", + "plt.show()" ] }, { "cell_type": "code", "execution_count": null, - "id": "1f42656e-aba7-495f-a029-480a27164316", + "id": "cca66295-b323-4498-8f91-cd7929bbfd6f", "metadata": {}, "outputs": [], "source": [ - "## merging together based on link Id \n", - "## assuming files are for the same network and has the same number of links\n", - "\n", - "full_df = linkstats.merge(network, left_on='link', right_on='linkId', how='outer')\n", - "\n", - "full_df.head(2)" + "# sf-bay area CRS is 26910\n", + "geo_network = read_geo_network(\n", + " network_path = \"../local_files/vta-beam-network/network.csv.gz\", \n", + " network_crs=26910\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "2b161ac7-73da-4524-a792-9950d9a914dd", + "id": "40c32b34-160b-4757-9861-1d45eaf829f8", "metadata": {}, "outputs": [], "source": [ - "## doing sanity check\n", + "# filtering and plotting\n", + "\n", + "links_with_most_volume = set(full_network[full_network['volume'] > np.percentile(full_network['volume'], 99.5)]['link'].unique())\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1})\n", "\n", - "def row_sanity_check(row):\n", - " return row['to'] == row['toNodeId'] \\\n", - " and row['from'] == row['fromNodeId'] \\\n", - " and row['freespeed'] == row['linkFreeSpeed']\n", + "filtered_1 = geo_network[(geo_network['linkId'].isin(links_with_most_volume))]\n", + "filtered_2 = geo_network[(~geo_network['linkId'].isin(links_with_most_volume))]\n", "\n", - "print(f\"the shape of df is {full_df.shape}\")\n", - "full_df.apply(lambda r: 'equal' if row_sanity_check(r) else 'NOT equal', axis=1).value_counts()" + "filtered_1.plot(ax=ax, label=f'({len(filtered_1)} links) with 0.5% top volume', color='red', lw=1.2)\n", + "filtered_2.plot(ax=ax, label=f'the rest of ({len(filtered_2)} links)', color='blue', lw=0.2)\n", + "\n", + "ax.legend()\n", + "\n", + "# base_file_name = '../local_files/vta-beam-network/links_with_most_volume'\n", + "# network[(network['linkId'].isin(slow_links_ids))].to_csv(base_file_name + \".csv.gz\")\n", + "# filtered_1.to_csv(base_file_name + \".geo.csv.gz\")\n", + "# filtered_1.to_file(base_file_name + \".shp\")\n", + "# plt.savefig(base_file_name + \".png\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "0f3ce36d-4e27-444d-addf-d53b1ed3a116", + "id": "8c3a9e75-d599-488e-acd8-9a9cc6269e7d", "metadata": {}, "outputs": [], "source": [ - "## calculating speed, free speed, speed in km/h, speed in mph\n", + "## how many of each road type are there with speed less than threshold\n", "\n", - "full_df['speed'] = full_df['length'] / full_df['traveltime']\n", + "speed_field = 'speed_mph'\n", + "threshold = 20\n", + "moreX_name = f\"more than {threshold} mph\"\n", + "lessX_name = f\"less than {threshold} mph\"\n", "\n", - "full_df['freespeed_km_h'] = full_df['freespeed'] * 3.6\n", - "full_df['speed_km_h'] = full_df['speed'] * 3.6\n", + "grouped_df = full_df.groupby('attributeOrigType')[[speed_field]].agg(\n", + " less_than_X=(speed_field, lambda gr:gr[gr < threshold].count()),\n", + " more_than_X=(speed_field, lambda gr:gr[gr >= threshold].count())\n", + ")\n", + "\n", + "grouped_df.rename({'less_than_X':lessX_name, 'more_than_X':moreX_name}, axis='columns', inplace=True)\n", "\n", - "full_df['freespeed_mph'] = full_df['freespeed'] * 2.237\n", - "full_df['speed_mph'] = full_df['speed'] * 2.237\n", + "ax = grouped_df.plot(kind='bar', stacked=False, rot=0, figsize=(20,4))\n", + "ax.set_xlabel(\"\")\n", "\n", - "full_df.head(2)" + "plt.savefig('../local_files/vta-beam-network/link_speed_graph.png')" ] }, { "cell_type": "code", "execution_count": null, - "id": "6877eef2-4d44-4dc8-9b4c-ab3f74037d57", + "id": "e13ef8bd-5b13-483c-8b9e-74dcc524e6c9", "metadata": {}, "outputs": [], "source": [ - "## speed distribution\n", + "# an example of analysis of speed of selected links subset\n", + "\n", + "east_bound_left = set([59118,80745]) # NE\n", + "west_bound_left = set([59119,80744]) # SW\n", + "\n", + "east_bound_right = set([20062,34374]) # NE\n", + "west_bound_right = set([20063,34375]) # SW\n", "\n", - "max_speed = int(full_df['speed_km_h'].max())\n", - "full_df['speed_km_h'].hist(bins=max_speed, figsize=(15,2))\n", - "print(f'max speed and number of buckets is {max_speed}')" + "display(network[network['linkId'].isin(east_bound_left)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']])\n", + "display(network[network['linkId'].isin(west_bound_left)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']])\n", + "display(network[network['linkId'].isin(east_bound_right)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']])\n", + "display(network[network['linkId'].isin(west_bound_right)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']])" ] }, { "cell_type": "code", "execution_count": null, - "id": "037b8214-2bcb-4c14-8e76-050cc4250635", + "id": "add2697e-ab23-4fea-aac6-596c32f1f1e9", "metadata": {}, "outputs": [], "source": [ - "## description of speeds for each road type\n", + "# an example of analysis of speed of selected links subset\n", + "# here are two bridges analysis\n", "\n", - "# dfs = []\n", - "dfs_free = []\n", + "left_road = set([59118,80745,59119,80744])\n", + "right_road = set([20062,34374,20063,34375]) \n", "\n", - "road_types = network['attributeOrigType'].unique()\n", - "for road_type in road_types:\n", - " filtered_network = full_df[full_df['attributeOrigType'] == road_type]\n", - " \n", - " # df = filtered_network[['speed']].describe()\n", - " # df.rename(columns={'speed':f'{road_type} speed'}, inplace=True)\n", - " # dfs.append(df.transpose())\n", + "fig,axs = plt.subplots(1,2,figsize=(18,4))\n", + "fig.subplots_adjust(wspace=0.1, hspace=0.4)\n", + "\n", + "def plot_aggregated_speed(df, set_of_ids, label, axis, save_to_csv=False):\n", + " df_filtered = df[(df['link'].isin(set_of_ids))]\n", + " directions = df_filtered['direction'].unique()\n", " \n", - " df = filtered_network[['freespeed']].describe()\n", - " df.rename(columns={'freespeed':f'{road_type} free speed'}, inplace=True)\n", - " dfs_free.append(df.transpose())\n", + " function_name = 'mean'\n", + " for direction in sorted(directions):\n", + " df_filtered_direction = df_filtered[df_filtered['direction'] == direction]\n", + " df_grouped = df_filtered_direction.groupby('hour')[['speed_mph']].agg(function_name).reset_index()\n", + " df_grouped.plot(x='hour', y='speed_mph', marker='o', linewidth=4, linestyle='-', label=f'{function_name} speed {label} [{direction}]', ax=axis, alpha=0.5)\n", " \n", - "# speed_df = pd.concat(dfs)\n", - "# display(speed_df)\n", + " if save_to_csv:\n", + " df_filtered.to_csv(f'../local_files/vta-beam-network/linkstats_1_filtered_links_{label}.csv'.replace(' ', '.'))\n", "\n", - "free_speed_df = pd.concat(dfs_free)\n", - "display(free_speed_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8c3a9e75-d599-488e-acd8-9a9cc6269e7d", - "metadata": {}, - "outputs": [], - "source": [ - "## how many of each road type are there with speed less than threshold\n", - "\n", - "grouped_df = full_df.groupby('attributeOrigType')[['linkFreeSpeed']].agg(\n", - " less_than_20=('linkFreeSpeed', lambda gr:gr[gr < 20].count()),\n", - " more_than_20=('linkFreeSpeed', lambda gr:gr[gr >= 20].count())\n", - ")\n", + " \n", + "plot_aggregated_speed(full_network, left_road, 'left road', axs[0])\n", + "plot_aggregated_speed(full_network, right_road, 'right road', axs[1])\n", "\n", - "grouped_df.rename({'less_than_20':\"less than 20\", 'more_than_20':\"more than 20\"}, axis='columns', inplace=True)\n", + "fig.suptitle(\"Full population simulation (linkstats #1 analysis)\")\n", "\n", - "ax = grouped_df.plot(kind='bar', stacked=True, rot=20, figsize=(20,4))\n", + "for ax in axs:\n", + " ax.set_ylim(bottom=0)\n", + " ax.legend(loc='lower right')\n", + " ax.set_ylabel('speed mph')\n", "\n", - "# if numbers are required on top of bars:\n", - "#\n", - "# for (container, pdd, color) in zip(ax.containers, [0,10], ['blue', 'orange']):\n", - "# ax.bar_label(container, padding=pdd, color=color)" + "# plt.savefig(f'../local_files/vta-beam-network/linkstats_1_filtered_links.png')" ] }, { "cell_type": "code", "execution_count": null, - "id": "acebf0fc-cc25-4e83-8576-b1ed78b30a33", + "id": "31837f31-0b03-4289-a7c8-6003c99a2a68", "metadata": {}, "outputs": [], - "source": [ - "grouped_df.sort_values('more than 20')" - ] + "source": [] }, { "cell_type": "code", @@ -206,11 +415,11 @@ "\n", "linkId_to_values = {}\n", "\n", - "links_with_speed_less_than_threshold = set(linkstats2[linkstats2['freespeed'] < threshold_ms]['link'].unique())\n", + "links_with_speed_less_than_threshold = set(full_df[full_df['freespeed'] < threshold_ms]['link'].unique())\n", "print(f\"there are {len(links_with_speed_less_than_threshold)} links with free speed less than {threshold_ms} meters per second.\")\n", "\n", "selected_columns = ['linkId','linkCapacity','linkFreeSpeed','linkLength','numberOfLanes','attributeOrigType']\n", - "df = network2[network2['linkId'].isin(links_with_speed_less_than_threshold)][selected_columns]\n", + "df = full_df[full_df['linkId'].isin(links_with_speed_less_than_threshold)][selected_columns]\n", "df.rename(columns={'linkId':'link_id', \n", " 'linkCapacity':'capacity', \n", " 'linkFreeSpeed':'free_speed', \n", @@ -245,7 +454,15 @@ { "cell_type": "code", "execution_count": null, - "id": "ba2cdeeb-20c5-4d55-914d-ebc4a0ec9b21", + "id": "2c85d25d-92a6-452e-bb5b-304036089f38", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5242850b-04d1-4e36-8e2c-bc178e92c8a3", "metadata": {}, "outputs": [], "source": [] @@ -253,7 +470,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cc09b33c-5a14-41f4-b3fe-72eb5fbc28c7", + "id": "266491e3-52db-4542-8a11-fa3181b9b379", "metadata": {}, "outputs": [], "source": [] @@ -261,25 +478,51 @@ { "cell_type": "code", "execution_count": null, - "id": "9c8906ff-8e9c-418d-813c-2138fdd7ccfc", + "id": "1675d562-dcfe-48dd-b4c9-2a1541cbe6ed", "metadata": {}, "outputs": [], "source": [ - "dd = {\n", - " 'l1' : [1,2,3] * 10,\n", - " 'l2' : list(range(12,12 + 15)) * 2\n", - "}\n", + "# group by and apply example\n", "\n", - "df = pd.DataFrame.from_dict(dd)\n", - "display(df.head())\n", + "import pandas as pd\n", + "\n", + "df = pd.DataFrame({'x':[2, 3, -10, -10],\n", + " 'y':[10, 13, 20, 30],\n", + " 'id':['a', 'a', 'b', 'b']})\n", + "\n", + "def mindist(data):\n", + " return min(data['y'] - data['x'])\n", + "\n", + "def maxdist(data):\n", + " return max(data['y'] - data['x'])\n", + "\n", + "def fun(data):\n", + " return pd.Series({\"maxdist\":maxdist(data),\n", + " \"mindist\":mindist(data)})\n", "\n", - "df.groupby('l1')[['l2']].agg(l3=('l2', lambda gr: gr[gr >= 16].count()), l4=('l2', lambda gr: gr[gr < 16].count()) )" + "df.groupby('id').apply(fun)" ] }, { "cell_type": "code", "execution_count": null, - "id": "df98f6c0-d777-45b5-9172-faf0410d2ac6", + "id": "3ea18a05-39a3-41ca-a884-30eabd465c97", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac8f73db-054d-4013-89e3-980095946bac", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98c3b0d3-3b73-47f1-a640-353fe563f7d0", "metadata": {}, "outputs": [], "source": [] diff --git a/jupyter/map_analysis/network_linkstats.py b/jupyter/map_analysis/network_linkstats.py index 8a709fcaafe..a02b1a26f30 100644 --- a/jupyter/map_analysis/network_linkstats.py +++ b/jupyter/map_analysis/network_linkstats.py @@ -7,7 +7,13 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt -import math +import geopandas as gpd + +from pyproj import CRS, Transformer +from geopandas import GeoDataFrame +from shapely.geometry import Point +from shapely.geometry.multilinestring import MultiLineString +from IPython.display import clear_output pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) @@ -15,108 +21,258 @@ pd.set_option('max_colwidth', None) -# In[ ]: +unspecified_link_type_name = 'unspecified' -## reading files +def get_direction(row): + d_x = 'North' + d_y = 'East' + if row['fromLocationX'] > row['toLocationX']: + d_x = 'South' + if row['fromLocationY'] > row['toLocationY']: + d_y = 'West' + return f'{d_x} - {d_y}' -network = pd.read_csv("../local_files/network.1.csv.gz") -network['attributeOrigType'].fillna(value='UNKNOWN', inplace=True) -linkstats = pd.read_csv("../local_files/linkstats.1.csv.gz") +def read_network(file_path): + df = pd.read_csv(file_path) -print(network.shape, linkstats.shape) -# network['attributeOrigType'].value_counts() + df['attributeOrigType'].fillna(value=unspecified_link_type_name, inplace=True) + df["attributeOrigId"] = df['attributeOrigId'].astype(str) + df["attributeOrigId"] = pd.to_numeric(df["attributeOrigId"], errors='coerce').fillna(0).astype('Int64') + df['direction'] = df.apply(get_direction, axis=1) + + return df -# In[ ]: +def read_linkstats(file_path): + df = pd.read_csv(file_path) -network.head(2) + df['speed_ms'] = df['length'] / df['traveltime'] + df['speed_mph'] = df['speed_ms'] * 2.237 + df['freespeed_mph'] = df['freespeed'] * 2.237 + df['speed_delta_mph'] = df['freespeed_mph'] - df['speed_mph'] + + return df -# In[ ]: +# sf-bay area CRS is 26910 +def read_geo_network(network_path, network_crs): + n_df = read_network(network_path) + + crs_to_id = 4326 # the LAT LON CRS + crs_to = CRS.from_epsg(crs_to_id) + crs_from = CRS.from_epsg(network_crs) + transformer = Transformer.from_crs(crs_from, crs_to) + + def out_row_to_geometry(df_row): + (from_x, from_y) = transformer.transform(df_row['fromLocationX'], df_row['fromLocationY']) + (to_x, to_y) = transformer.transform(df_row['toLocationX'], df_row['toLocationY']) + mls = MultiLineString([[[from_y, from_x], [to_y, to_x]]]) + return mls + + geometry = n_df.apply(out_row_to_geometry, axis=1) + geo_df = gpd.GeoDataFrame(n_df, crs=f'epsg:{crs_to_id}', geometry=geometry) + return geo_df + -linkstats.head(2) +def read_full_network(linkstats_path, network_path): + n_df = read_network(network_path) + l_df = read_linkstats(linkstats_path) + + # simple sanity check + links_ids_1 = set(l_df['link'].unique()) + links_ids_2 = set(n_df['linkId'].unique()) + if (len(links_ids_1 - links_ids_2) != 0): + print(f"SOMETHING IS WRONG!! Links are not the same from both files!") + full_df = l_df.merge(n_df, left_on='link', right_on='linkId', how='outer') + return full_df -# In[ ]: +## to do sanity check after merging linkstats and network +## to ensure that the linkstats was produced from the network +def do_sanity_check(full_df): -## merging together based on link Id -## assuming files are for the same network and has the same number of links + def row_sanity_check(row): + return row['to'] == row['toNodeId'] \ + and row['from'] == row['fromNodeId'] \ + and row['freespeed'] == row['linkFreeSpeed'] -full_df = linkstats.merge(network, left_on='link', right_on='linkId', how='outer') + return full_df.apply(lambda r: 'equal' if row_sanity_check(r) else 'NOT equal', axis=1).value_counts() -full_df.head(2) +'init complete' # In[ ]: -## doing sanity check +## reading network file +# network = read_network("../local_files/vta-beam-network/network.csv.gz") +# print(f"there are {len(network)} records") +# display(network.head(2)) + +## reading linkstats file +# linkstats = read_linkstats("../local_files/vta-beam-network/1.0.linkstats.csv.gz") +# print(f"there are {len(linkstats)} records") +# display(linkstats.head(2)) -def row_sanity_check(row): - return row['to'] == row['toNodeId'] \ - and row['from'] == row['fromNodeId'] \ - and row['freespeed'] == row['linkFreeSpeed'] +# reading full network +full_network = read_full_network( + linkstats_path = "../local_files/vta-beam-network/1.0.linkstats.csv.gz", + network_path = "../local_files/vta-beam-network/network.csv.gz", +) -print(f"the shape of df is {full_df.shape}") -full_df.apply(lambda r: 'equal' if row_sanity_check(r) else 'NOT equal', axis=1).value_counts() +display(full_network.head(2)) # In[ ]: -## calculating speed, free speed, speed in km/h, speed in mph +def avg_speed_vol(data): + if data['volume'].sum() > 0: + return np.average(data['speed_mph'], weights=data['volume']) + return np.average(data['speed_mph']) -full_df['speed'] = full_df['length'] / full_df['traveltime'] +def total_volume(data): + return np.sum(data['volume']) -full_df['freespeed_km_h'] = full_df['freespeed'] * 3.6 -full_df['speed_km_h'] = full_df['speed'] * 3.6 +def average_freespeed(data): + return np.average(data['freespeed_mph']) -full_df['freespeed_mph'] = full_df['freespeed'] * 2.237 -full_df['speed_mph'] = full_df['speed'] * 2.237 +def speeds_analysis(data): + return pd.Series({ + "average weighted speed" : avg_speed_vol(data), + "total volume": total_volume(data), + "average freespeed": average_freespeed(data), + "5 percentile of speed": np.percentile(data['speed_mph'], 5), + "95 percentile of speed": np.percentile(data['speed_mph'], 95), + }) -full_df.head(2) +df = full_network #.head(100000) +df_g = df.groupby(['hour','attributeOrigType']).apply(speeds_analysis).reset_index() +df_g['total volume weighted'] = df_g['total volume'] / df_g['total volume'].max() -# In[ ]: +road_types_df = df_g.groupby('attributeOrigType')['total volume'].sum().reset_index().sort_values('total volume', ascending=False) +road_types_ordered_by_volume = list(road_types_df['attributeOrigType']) + +hours = df_g['hour'].unique() + +plt.figure(figsize=(15,7)) +plt.ylim(0, 70) +N_road_types = 6 -## speed distribution +for road_type in road_types_ordered_by_volume[:6]: + df_g_f = df_g[df_g['attributeOrigType'] == road_type] + avg_speed = df_g_f['average weighted speed'] + + size_multiplier = df_g_f['total volume weighted'].sum() * 20 + size = df_g_f['total volume weighted'] * size_multiplier + + plt.scatter(x=hours, y=avg_speed, s=size, label=f"avg speed mph [{road_type}]", alpha=0.6) + plt.plot(hours, avg_speed, alpha=0.2) -max_speed = int(full_df['speed_km_h'].max()) -full_df['speed_km_h'].hist(bins=max_speed, figsize=(15,2)) -print(f'max speed and number of buckets is {max_speed}') +plt.legend() +plt.title(f"Average speed for road types weighted by volume (top {N_road_types} road types by volume)") +plt.show() # In[ ]: -## description of speeds for each road type +def avg_speed_vol(data): + if data['volume'].sum() > 0: + return np.average(data['speed_mph'], weights=data['volume']) + return np.average(data['speed_mph']) -# dfs = [] -dfs_free = [] +def total_volume(data): + return np.sum(data['volume']) -road_types = network['attributeOrigType'].unique() -for road_type in road_types: - filtered_network = full_df[full_df['attributeOrigType'] == road_type] - - # df = filtered_network[['speed']].describe() - # df.rename(columns={'speed':f'{road_type} speed'}, inplace=True) - # dfs.append(df.transpose()) +def average_freespeed(data): + return np.average(data['freespeed_mph']) + +def speeds_analysis(data): + return pd.Series({ + "average weighted speed" : avg_speed_vol(data), + "total volume": total_volume(data), + "average freespeed": average_freespeed(data), + "5 percentile of speed": np.percentile(data['speed_mph'], 5), + "95 percentile of speed": np.percentile(data['speed_mph'], 95), + }) + + +links_with_most_volume = set(full_network[full_network['volume'] > np.percentile(full_network['volume'], 99.5)]['link'].unique()) +df = full_network[full_network['link'].isin(links_with_most_volume)] + +df_g = df.groupby(['hour','attributeOrigType']).apply(speeds_analysis).reset_index() +df_g['total volume weighted'] = df_g['total volume'] / df_g['total volume'].max() + +road_types_df = df_g.groupby('attributeOrigType')['total volume'].sum().reset_index().sort_values('total volume', ascending=False) +# display(road_types_df) + +road_types_ordered_by_volume = list(road_types_df['attributeOrigType']) + +hours = df_g['hour'].unique() + +plt.figure(figsize=(15,4)) +plt.ylim(0, 70) + +N_road_types = 3 + +for road_type in road_types_ordered_by_volume[:N_road_types]: + df_g_f = df_g[df_g['attributeOrigType'] == road_type] + avg_speed = df_g_f['average weighted speed'] + min_speed = df_g_f["5 percentile of speed"] - df = filtered_network[['freespeed']].describe() - df.rename(columns={'freespeed':f'{road_type} free speed'}, inplace=True) - dfs_free.append(df.transpose()) + size_multiplier = df_g_f['total volume weighted'].sum() * 20 + size = df_g_f['total volume weighted'] * size_multiplier -# speed_df = pd.concat(dfs) -# display(speed_df) + scatterplot = plt.scatter(x=hours, y=avg_speed, s=size, label=f"avg speed mph [{road_type}, volume {df_g_f['total volume'].sum()}]", alpha=0.4) + col = scatterplot.get_facecolors()[0].tolist() + plt.plot(hours, avg_speed, alpha=0.2, c=col) + plt.plot(hours, min_speed, alpha=0.2, label=f"5 percentile for speed mph [{road_type}]", c=col) + +plt.legend() +plt.title(f"Average speed for top 0.5% links by volume, weighted by volume (top {N_road_types} road types by volume)") +plt.show() + -free_speed_df = pd.concat(dfs_free) -display(free_speed_df) +# In[ ]: + + +# sf-bay area CRS is 26910 +geo_network = read_geo_network( + network_path = "../local_files/vta-beam-network/network.csv.gz", + network_crs=26910 +) + + +# In[ ]: + + +# filtering and plotting + +links_with_most_volume = set(full_network[full_network['volume'] > np.percentile(full_network['volume'], 99.5)]['link'].unique()) + +fig, ax = plt.subplots(1, 1, figsize=(20, 20), dpi=300, subplot_kw={'aspect': 1}) + +filtered_1 = geo_network[(geo_network['linkId'].isin(links_with_most_volume))] +filtered_2 = geo_network[(~geo_network['linkId'].isin(links_with_most_volume))] + +filtered_1.plot(ax=ax, label=f'({len(filtered_1)} links) with 0.5% top volume', color='red', lw=1.2) +filtered_2.plot(ax=ax, label=f'the rest of ({len(filtered_2)} links)', color='blue', lw=0.2) + +ax.legend() + +# base_file_name = '../local_files/vta-beam-network/links_with_most_volume' +# network[(network['linkId'].isin(slow_links_ids))].to_csv(base_file_name + ".csv.gz") +# filtered_1.to_csv(base_file_name + ".geo.csv.gz") +# filtered_1.to_file(base_file_name + ".shp") +# plt.savefig(base_file_name + ".png") # In[ ]: @@ -124,25 +280,84 @@ def row_sanity_check(row): ## how many of each road type are there with speed less than threshold -grouped_df = full_df.groupby('attributeOrigType')[['linkFreeSpeed']].agg( - less_than_20=('linkFreeSpeed', lambda gr:gr[gr < 20].count()), - more_than_20=('linkFreeSpeed', lambda gr:gr[gr >= 20].count()) +speed_field = 'speed_mph' +threshold = 20 +moreX_name = f"more than {threshold} mph" +lessX_name = f"less than {threshold} mph" + +grouped_df = full_df.groupby('attributeOrigType')[[speed_field]].agg( + less_than_X=(speed_field, lambda gr:gr[gr < threshold].count()), + more_than_X=(speed_field, lambda gr:gr[gr >= threshold].count()) ) -grouped_df.rename({'less_than_20':"less than 20", 'more_than_20':"more than 20"}, axis='columns', inplace=True) +grouped_df.rename({'less_than_X':lessX_name, 'more_than_X':moreX_name}, axis='columns', inplace=True) + +ax = grouped_df.plot(kind='bar', stacked=False, rot=0, figsize=(20,4)) +ax.set_xlabel("") -ax = grouped_df.plot(kind='bar', stacked=True, rot=20, figsize=(20,4)) +plt.savefig('../local_files/vta-beam-network/link_speed_graph.png') -# if numbers are required on top of bars: -# -# for (container, pdd, color) in zip(ax.containers, [0,10], ['blue', 'orange']): -# ax.bar_label(container, padding=pdd, color=color) + +# In[ ]: + + +# an example of analysis of speed of selected links subset + +east_bound_left = set([59118,80745]) # NE +west_bound_left = set([59119,80744]) # SW + +east_bound_right = set([20062,34374]) # NE +west_bound_right = set([20063,34375]) # SW + +display(network[network['linkId'].isin(east_bound_left)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']]) +display(network[network['linkId'].isin(west_bound_left)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']]) +display(network[network['linkId'].isin(east_bound_right)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']]) +display(network[network['linkId'].isin(west_bound_right)][['linkId','fromLocationX','fromLocationY','toLocationX','toLocationY','direction']]) + + +# In[ ]: + + +# an example of analysis of speed of selected links subset +# here are two bridges analysis + +left_road = set([59118,80745,59119,80744]) +right_road = set([20062,34374,20063,34375]) + +fig,axs = plt.subplots(1,2,figsize=(18,4)) +fig.subplots_adjust(wspace=0.1, hspace=0.4) + +def plot_aggregated_speed(df, set_of_ids, label, axis, save_to_csv=False): + df_filtered = df[(df['link'].isin(set_of_ids))] + directions = df_filtered['direction'].unique() + + function_name = 'mean' + for direction in sorted(directions): + df_filtered_direction = df_filtered[df_filtered['direction'] == direction] + df_grouped = df_filtered_direction.groupby('hour')[['speed_mph']].agg(function_name).reset_index() + df_grouped.plot(x='hour', y='speed_mph', marker='o', linewidth=4, linestyle='-', label=f'{function_name} speed {label} [{direction}]', ax=axis, alpha=0.5) + + if save_to_csv: + df_filtered.to_csv(f'../local_files/vta-beam-network/linkstats_1_filtered_links_{label}.csv'.replace(' ', '.')) + + +plot_aggregated_speed(full_network, left_road, 'left road', axs[0]) +plot_aggregated_speed(full_network, right_road, 'right road', axs[1]) + +fig.suptitle("Full population simulation (linkstats #1 analysis)") + +for ax in axs: + ax.set_ylim(bottom=0) + ax.legend(loc='lower right') + ax.set_ylabel('speed mph') + +# plt.savefig(f'../local_files/vta-beam-network/linkstats_1_filtered_links.png') # In[ ]: -grouped_df.sort_values('more than 20') + # In[ ]: @@ -159,11 +374,11 @@ def row_sanity_check(row): linkId_to_values = {} -links_with_speed_less_than_threshold = set(linkstats2[linkstats2['freespeed'] < threshold_ms]['link'].unique()) +links_with_speed_less_than_threshold = set(full_df[full_df['freespeed'] < threshold_ms]['link'].unique()) print(f"there are {len(links_with_speed_less_than_threshold)} links with free speed less than {threshold_ms} meters per second.") selected_columns = ['linkId','linkCapacity','linkFreeSpeed','linkLength','numberOfLanes','attributeOrigType'] -df = network2[network2['linkId'].isin(links_with_speed_less_than_threshold)][selected_columns] +df = full_df[full_df['linkId'].isin(links_with_speed_less_than_threshold)][selected_columns] df.rename(columns={'linkId':'link_id', 'linkCapacity':'capacity', 'linkFreeSpeed':'free_speed', @@ -210,15 +425,43 @@ def get_mean_speed(row): # In[ ]: -dd = { - 'l1' : [1,2,3] * 10, - 'l2' : list(range(12,12 + 15)) * 2 -} -df = pd.DataFrame.from_dict(dd) -display(df.head()) -df.groupby('l1')[['l2']].agg(l3=('l2', lambda gr: gr[gr >= 16].count()), l4=('l2', lambda gr: gr[gr < 16].count()) ) + +# In[ ]: + + +# group by and apply example + +import pandas as pd + +df = pd.DataFrame({'x':[2, 3, -10, -10], + 'y':[10, 13, 20, 30], + 'id':['a', 'a', 'b', 'b']}) + +def mindist(data): + return min(data['y'] - data['x']) + +def maxdist(data): + return max(data['y'] - data['x']) + +def fun(data): + return pd.Series({"maxdist":maxdist(data), + "mindist":mindist(data)}) + +df.groupby('id').apply(fun) + + +# In[ ]: + + + + + +# In[ ]: + + + # In[ ]: From 5dd5bf411d3c0e77d396d01bc6db134a790e6bfd Mon Sep 17 00:00:00 2001 From: Nikolay Ilyin Date: Tue, 5 Sep 2023 18:46:37 +0300 Subject: [PATCH 5/5] new line added --- docker/jupyter-enhanced/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/jupyter-enhanced/Dockerfile b/docker/jupyter-enhanced/Dockerfile index 12a07b71f81..15039f1d483 100644 --- a/docker/jupyter-enhanced/Dockerfile +++ b/docker/jupyter-enhanced/Dockerfile @@ -3,4 +3,4 @@ FROM jupyter/scipy-notebook RUN pip install geopandas pandas pygeos boto s3fs shapely gcsfs RUN pip install p2j pyproj pyrosm RUN pip install pyshp -RUN pip install boto3 \ No newline at end of file +RUN pip install boto3