Skip to content

Commit

Permalink
coderabbitai: use regex for start and end parsing. fallback to split()
Browse files Browse the repository at this point in the history
  • Loading branch information
Cruz Núñez authored and Cruz Núñez committed Sep 24, 2024
1 parent e05b5cc commit 24cabec
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions city_scrapers/spiders/losca_Board_of_ed.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,31 @@ def _parse_start(self, item):
Get start date from title instead, since it is in the correct time zone.
"""
raw = item.css("title::text").get()
date = " ".join(raw.split()[0:3])
return parse(date)
match = re.search(r"(\d{1,2}/\d{1,2}/\d{4}\s+\d{1,2}:\d{2}\s+[AP]M)", raw)
if match:
return parse(match.group(1))
else:
# Fallback to the original method if regex doesn't match
return parse(" ".join(raw.split()[0:3]))

def _parse_end(self, item):
"""
Parse end datetime as a naive datetime object.
End time is in title.
"""
raw = item.css("title::text").get().split()
date = raw[0]
time = " ".join(raw[4:6])
return parse(f"{date} {time}")
raw = item.css("title::text").get()
match = re.search(
r"\d{1,2}/\d{1,2}/\d{4}\s+\d{1,2}:\d{2}\s+[AP]M\s+-\s+(\d{1,2}:\d{2}\s+[AP]M)", # noqa
raw,
)
if match:
date = raw.split()[0]
time = match.group(1)
return parse(f"{date} {time}")
else:
# Fallback to the original method if regex doesn't match
raw_split = raw.split()
return parse(f"{raw_split[0]} {' '.join(raw_split[4:6])}")

def _parse_links(self, item):
"""
Expand Down

0 comments on commit 24cabec

Please sign in to comment.