Merge pull request #2374 from freelawproject/iquery-pages-support-for…

…-recap-uploads Accept recap uploads for docket iQuery pages
freelawproject · Nov 27, 2022 · 188c575 · 188c575
2 parents e043f42 + c166e50
commit 188c575
Show file tree

Hide file tree

Showing 5 changed files with 151 additions and 0 deletions.
diff --git a/cl/recap/migrations/0009_alter_processingqueue_upload_type_noop.py b/cl/recap/migrations/0009_alter_processingqueue_upload_type_noop.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.2.16 on 2022-11-22 22:57
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('recap', '0008_alter_nos_noop'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='pacerhtmlfiles',
+            name='upload_type',
+            field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
+        ),
+        migrations.AlterField(
+            model_name='processingqueue',
+            name='upload_type',
+            field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
+        ),
+    ]
diff --git a/cl/recap/migrations/0009_alter_processingqueue_upload_type_noop.sql b/cl/recap/migrations/0009_alter_processingqueue_upload_type_noop.sql
@@ -0,0 +1,8 @@
+BEGIN;
+--
+-- Alter field upload_type on pacerhtmlfiles
+--
+--
+-- Alter field upload_type on processingqueue
+--
+COMMIT;
diff --git a/cl/recap/models.py b/cl/recap/models.py
@@ -20,6 +20,7 @@ class UPLOAD_TYPE:
     CLAIMS_REGISTER = 9
     DOCUMENT_ZIP = 10
     SES_EMAIL = 11
+    CASE_QUERY_PAGE = 12
 
     NAMES = (
         (DOCKET, "HTML Docket"),
@@ -33,6 +34,7 @@ class UPLOAD_TYPE:
         (CLAIMS_REGISTER, "Claims register page"),
         (DOCUMENT_ZIP, "Zip archive of RECAP Documents"),
         (SES_EMAIL, "Email in the SES storage format"),
+        (CASE_QUERY_PAGE, "Case query page"),
     )
 
 

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
@@ -21,6 +21,7 @@
 from juriscraper.pacer import (
     AppellateDocketReport,
     AttachmentPage,
+    CaseQuery,
     ClaimsRegister,
     DocketHistoryReport,
     DocketReport,
@@ -118,6 +119,11 @@ def process_recap_upload(pq: ProcessingQueue) -> None:
         process_recap_claims_register.delay(pq.pk)
     elif pq.upload_type == UPLOAD_TYPE.DOCUMENT_ZIP:
         process_recap_zip.delay(pq.pk)
+    elif pq.upload_type == UPLOAD_TYPE.CASE_QUERY_PAGE:
+        chain(
+            process_case_query_page.s(pq.pk),
+            add_or_update_recap_docket.s(),
+        ).apply_async()
 
 
 def do_pacer_fetch(fq: PacerFetchQueue):
@@ -823,6 +829,98 @@ def process_recap_docket_history_report(self, pk):
     }
 
 
+@app.task(
+    bind=True, max_retries=3, interval_start=5 * 60, interval_step=5 * 60
+)
+def process_case_query_page(self, pk):
+    """Process the case query (iquery.pl) page.
+
+    :param pk: The primary key of the processing queue item you want to work on
+    :returns: A dict indicating whether the docket needs Solr re-indexing.
+    """
+
+    pq = ProcessingQueue.objects.get(pk=pk)
+    mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS)
+    logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}")
+
+    try:
+        text = pq.filepath_local.read().decode()
+    except IOError as exc:
+        msg = f"Internal processing error ({exc.errno}: {exc.strerror})."
+        if (self.request.retries == self.max_retries) or pq.debug:
+            mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED)
+            return None
+        else:
+            mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY)
+            raise self.retry(exc=exc)
+
+    report = CaseQuery(map_cl_to_pacer_id(pq.court_id))
+    report._parse_text(text)
+    data = report.data
+    logger.info(f"Parsing completed for item {pq}")
+
+    if data == {}:
+        # Bad docket iquery page.
+        msg = "Not a valid case query page upload."
+        mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT)
+        self.request.chain = None
+        return None
+
+    # Merge the contents of the docket into CL.
+    d = find_docket_object(
+        pq.court_id, pq.pacer_case_id, data["docket_number"]
+    )
+    current_case_name = d.case_name
+    d.add_recap_source()
+    update_docket_metadata(d, data)
+
+    # Update the docket in SOLR if the case name has changed and contains
+    # docket entries
+    content_updated = False
+    if current_case_name != d.case_name and d.pk:
+        if d.docket_entries.exists():
+            content_updated = True
+
+    if pq.debug:
+        mark_pq_successful(pq, d_id=d.pk)
+        self.request.chain = None
+        return {"docket_pk": d.pk, "content_updated": False}
+
+    try:
+        d.save()
+        add_bankruptcy_data_to_docket(d, data)
+    except IntegrityError as exc:
+        logger.warning(
+            "Race condition experienced while attempting docket save."
+        )
+        error_message = "Unable to save docket due to IntegrityError."
+        if self.request.retries == self.max_retries:
+            mark_pq_status(pq, error_message, PROCESSING_STATUS.FAILED)
+            self.request.chain = None
+            return None
+        else:
+            mark_pq_status(
+                pq, error_message, PROCESSING_STATUS.QUEUED_FOR_RETRY
+            )
+            raise self.retry(exc=exc)
+
+    # Add the HTML to the docket in case we need it someday.
+    pacer_file = PacerHtmlFiles(
+        content_object=d, upload_type=UPLOAD_TYPE.CASE_QUERY_PAGE
+    )
+    pacer_file.filepath.save(
+        # We only care about the ext w/S3PrivateUUIDStorageTest
+        "case_report.html",
+        ContentFile(text.encode()),
+    )
+
+    mark_pq_successful(pq, d_id=d.pk)
+    return {
+        "docket_pk": d.pk,
+        "content_updated": content_updated,
+    }
+
+
 @app.task(bind=True, max_retries=3, ignore_result=True)
 def process_recap_appellate_docket(self, pk):
     """Process an uploaded appellate docket from the RECAP API endpoint.

diff --git a/cl/recap/tests.py b/cl/recap/tests.py
@@ -287,6 +287,26 @@ def test_ensure_no_users_in_response(self, mock):
                 j[bad_key]
         mock.assert_called()
 
+    def test_uploading_a_case_query_page(self, mock):
+        """Can we upload a docket iquery page and have it be saved correctly?
+
+        Note that this works fine even though we're not actually uploading a
+        docket due to the mock.
+        """
+        self.data.update(
+            {"upload_type": UPLOAD_TYPE.CASE_QUERY_PAGE, "document_number": ""}
+        )
+        del self.data["pacer_doc_id"]
+        r = self.client.post(self.path, self.data)
+        self.assertEqual(r.status_code, HTTP_201_CREATED)
+
+        j = json.loads(r.content)
+        path = reverse(
+            "processingqueue-detail", kwargs={"version": "v3", "pk": j["id"]}
+        )
+        r = self.client.get(path)
+        self.assertEqual(r.status_code, HTTP_200_OK)
+
 
 @mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport)
 @mock.patch(