City-Bureau · cruznunez · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/city_scrapers/spiders/losca_Board_of_ed.py b/city_scrapers/spiders/losca_Board_of_ed.py
@@ -0,0 +1,88 @@
+from city_scrapers_core.constants import BOARD
+from city_scrapers_core.items import Meeting
+from city_scrapers_core.spiders import CityScrapersSpider
+from dateutil.parser import parse
+
+
+class LoscaBoardOfEdSpider(CityScrapersSpider):
+    name = "losca_Board_of_ed"
+    agency = "Los Angeles Unified School District Board of Education"
+    timezone = "America/Chicago"
+    # original URL was https://www.lausd.org/boe
+    # they have an RSS feed. scrape that instead
+    start_urls = [
+        "https://www.lausd.org/site/RSS.aspx?DomainID=1057&ModuleInstanceID=73805&PageID=18628&PMIID=0"  # noqa
+    ]
+
+    def parse(self, response):
+        """
+        Parse meeting items from RSS feed.
+        """
+        location = {
+            "name": "LAUSD Headquarters",
+            "address": "333 South Beaudry Avenue, Board Room, Los Angeles, CA 90017",
+        }
+        for item in response.css("item"):
+            # pdb.set_trace()
+            meeting = Meeting(
+                title=self._parse_title(item),
+                description="",
+                classification=BOARD,
+                start=self._parse_start(item),
+                end=self._parse_end(item),
+                all_day=False,
+                time_notes="",
+                location=location,
+                links=self._parse_links(item),
+                source=response.url,
+            )
+
+            meeting["status"] = self._get_status(meeting)
+            meeting["id"] = self._get_id(meeting)
+
+            yield meeting
+
+    def _parse_title(self, item):
+        """
+        Parse meeting title. RSS feed titles always start with timestamp.
+        Ex: '9/19/2024 10:00 AM - 1:00 PM Children... Early Education Committee'
+        Remove timestamp from string and return title.
+        """
+        raw = item.css("title::text").get()
+        no_stamp = raw.split()[6:-1]
+        title = " ".join(no_stamp)
+        return title
+
+    def _parse_start(self, item):
+        """
+        Parse start datetime as a naive datetime object.
+        pubdate::text gives us GMT, which is 7 hours ahead of PST.
+        Get start date from title instead, since it is in the correct time zone.
+        """
+        raw = item.css("title::text").get()
+        date = " ".join(raw.split()[0:3])
+        return parse(date)
+
+    def _parse_end(self, item):
+        """
+        Parse end datetime as a naive datetime object.
+        End time is in title.
+        """
+        raw = item.css("title::text").get().split()
+        date = raw[0]
+        time = " ".join(raw[4:6])
+        return parse(f"{date} {time}")
+
+    def _parse_links(self, item):
+        """
+        Parse links. item.get() returns
+        '...</title><link>https://www.lausd.org...EventDateID=73502<pubdate>...'
+        This string does not have a closing <link> tag even though the source
+        response does. This causes item.css('link') to return an empty tag.
+        We must parse link another way. Chose to use split.
+        """
+        split = item.get().split("<link>")[1]
+        link = split.split("<pubdate>")[0]
+        links = [{"title": "Meeting Details", "href": link}]
+
+        return links
diff --git a/tests/files/losca_Board_of_ed.html b/tests/files/losca_Board_of_ed.html
diff --git a/tests/test_losca_Board_of_ed.py b/tests/test_losca_Board_of_ed.py
@@ -0,0 +1,96 @@
+from datetime import datetime
+from os.path import dirname, join
+
+import pytest
+from city_scrapers_core.constants import BOARD
+from city_scrapers_core.utils import file_response
+from freezegun import freeze_time
+
+from city_scrapers.spiders.losca_Board_of_ed import LoscaBoardOfEdSpider
+
+test_response = file_response(
+    join(dirname(__file__), "files", "losca_Board_of_ed.html"),
+    url="https://www.lausd.org/site/RSS.aspx?DomainID=1057&ModuleInstanceID=73805&PageID=18628&PMIID=0",  # noqa
+)
+spider = LoscaBoardOfEdSpider()
+
+freezer = freeze_time("2024-09-19")
+freezer.start()
+
+parsed_items = [item for item in spider.parse(test_response)]
+
+freezer.stop()
+
+
+def test_count():
+    assert len(parsed_items) == 12
+
+
+def test_title():
+    assert parsed_items[0]["title"] == "Greening Schools & Climate Resilience"
+    assert parsed_items[1]["title"] == "Curriculum and Instruction"
+
+
+def test_description():
+    assert parsed_items[0]["description"] == ""
+
+
+def test_start():
+    assert parsed_items[0]["start"] == datetime(2024, 9, 24, 13, 0)
+
+
+def test_end():
+    assert parsed_items[0]["end"] == datetime(2024, 9, 24, 16, 0)
+
+
+def test_time_notes():
+    assert parsed_items[0]["time_notes"] == ""
+
+
+def test_id():
+    assert (
+        parsed_items[0]["id"]
+        == "losca_Board_of_ed/202409241300/x/greening_schools_climate_resilience"
+    )
+
+
+def test_status():
+    assert parsed_items[0]["status"] == "tentative"
+
+
+def test_location():
+    assert parsed_items[0]["location"] == {
+        "name": "LAUSD Headquarters",
+        "address": "333 South Beaudry Avenue, Board Room, Los Angeles, CA 90017",
+    }
+
+
+def test_source():
+    assert (
+        parsed_items[0]["source"]
+        == "https://www.lausd.org/site/RSS.aspx?DomainID=1057&ModuleInstanceID=73805&PageID=18628&PMIID=0"  # noqa
+    )
+
+
+def test_links():
+    assert parsed_items[0]["links"] == [
+        {
+            "title": "Meeting Details",
+            "href": "https://www.lausd.org/site/Default.aspx?PageID=18628&amp;PageType=17&amp;DomainID=1057&amp;ModuleInstanceID=73805&amp;EventDateID=73502",  # noqa
+        }
+    ]
+    assert parsed_items[1]["links"] == [
+        {
+            "title": "Meeting Details",
+            "href": "https://www.lausd.org/site/Default.aspx?PageID=18628&amp;PageType=17&amp;DomainID=1057&amp;ModuleInstanceID=73805&amp;EventDateID=71879",  # noqa
+        }
+    ]
+
+
+def test_classification():
+    assert parsed_items[0]["classification"] == BOARD
+
+
+@pytest.mark.parametrize("item", parsed_items)
+def test_all_day(item):
+    assert item["all_day"] is False