Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix clean osm data #628

Merged
merged 3 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions scripts/build_osm_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,6 @@ def set_lines_ids(lines, buses, distance_crs):
linesepsg = lines.to_crs(distance_crs)

for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids):
row["dc"] = float(row["tag_frequency"]) == 0

# select buses having the voltage level of the current line
buses_sel = busesepsg[
(buses["voltage"] == row["voltage"]) & (buses["dc"] == row["dc"])
Expand Down Expand Up @@ -713,13 +711,11 @@ def force_ac_lines(df, col="tag_frequency"):
When it is artificially converted into AC, this feature is lost.
However, for debugging and preliminary analysis, it can be useful to bypass problems.
"""
DC_freq = 0.0
DC_lines = (df["tag_frequency"] - DC_freq).abs() <= 0.01

# TODO: default frequency may be by country
default_frequency = 50
default_ac_frequency = 50

df[df[DC_lines].index] = default_frequency
df["tag_frequency"] = default_ac_frequency
df["dc"] = False

return df

Expand Down
13 changes: 7 additions & 6 deletions scripts/clean_osm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def add_line_endings_tosubstations(substations, lines):
bus_s = gpd.GeoDataFrame(columns=substations.columns, crs=substations.crs)
bus_e = gpd.GeoDataFrame(columns=substations.columns, crs=substations.crs)

is_ac = lines["tag_frequency"].astype(float) != 0

# Read information from line.csv
bus_s[["voltage", "country"]] = lines[["voltage", "country"]].astype(str)
bus_s["geometry"] = lines.geometry.boundary.map(
Expand All @@ -92,7 +90,7 @@ def add_line_endings_tosubstations(substations, lines):
+ 1
+ bus_s.index
)
bus_s["dc"] = ~is_ac
bus_s["dc"] = lines["dc"]

bus_e[["voltage", "country"]] = lines[["voltage", "country"]].astype(str)
bus_e["geometry"] = lines.geometry.boundary.map(
Expand All @@ -101,7 +99,7 @@ def add_line_endings_tosubstations(substations, lines):
bus_e["lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None)
bus_e["lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None)
bus_e["bus_id"] = bus_s["bus_id"].max() + 1 + bus_e.index
bus_e["dc"] = ~is_ac
bus_e["dc"] = lines["dc"]

bus_all = pd.concat([bus_s, bus_e], ignore_index=True)

Expand Down Expand Up @@ -195,20 +193,22 @@ def filter_voltage(df, threshold_voltage=35000):
return df


def filter_frequency(df, accepted_values=[50, 60, 0]):
def filter_frequency(df, accepted_values=[50, 60, 0], threshold=0.1):
"""Filters df to contain only lines with frequency with accepted_values"""
df["tag_frequency"] = pd.to_numeric(df["tag_frequency"], errors="coerce").astype(
float
)
df.dropna(subset=["tag_frequency"], inplace=True)

accepted_rows = pd.concat(
[(df["tag_frequency"] - f_val).abs() <= 0.01 for f_val in accepted_values],
[(df["tag_frequency"] - f_val).abs() <= threshold for f_val in accepted_values],
axis=1,
).any(axis="columns")

df.drop(df[~accepted_rows].index, inplace=True)

df["dc"] = df["tag_frequency"].abs() <= threshold

return df


Expand Down Expand Up @@ -270,6 +270,7 @@ def prepare_lines_df(df_lines):
"under_construction",
"tag_type",
"tag_frequency",
"dc",
"cables",
"geometry",
"country",
Expand Down