Skip to content

Commit

Permalink
Merge pull request #2374 from freelawproject/iquery-pages-support-for…
Browse files Browse the repository at this point in the history
…-recap-uploads

Accept recap uploads for docket iQuery pages
  • Loading branch information
mlissner authored Nov 27, 2022
2 parents e043f42 + c166e50 commit 188c575
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 0 deletions.
23 changes: 23 additions & 0 deletions cl/recap/migrations/0009_alter_processingqueue_upload_type_noop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.16 on 2022-11-22 22:57

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('recap', '0008_alter_nos_noop'),
]

operations = [
migrations.AlterField(
model_name='pacerhtmlfiles',
name='upload_type',
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
),
migrations.AlterField(
model_name='processingqueue',
name='upload_type',
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
BEGIN;
--
-- Alter field upload_type on pacerhtmlfiles
--
--
-- Alter field upload_type on processingqueue
--
COMMIT;
2 changes: 2 additions & 0 deletions cl/recap/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class UPLOAD_TYPE:
CLAIMS_REGISTER = 9
DOCUMENT_ZIP = 10
SES_EMAIL = 11
CASE_QUERY_PAGE = 12

NAMES = (
(DOCKET, "HTML Docket"),
Expand All @@ -33,6 +34,7 @@ class UPLOAD_TYPE:
(CLAIMS_REGISTER, "Claims register page"),
(DOCUMENT_ZIP, "Zip archive of RECAP Documents"),
(SES_EMAIL, "Email in the SES storage format"),
(CASE_QUERY_PAGE, "Case query page"),
)


Expand Down
98 changes: 98 additions & 0 deletions cl/recap/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from juriscraper.pacer import (
AppellateDocketReport,
AttachmentPage,
CaseQuery,
ClaimsRegister,
DocketHistoryReport,
DocketReport,
Expand Down Expand Up @@ -118,6 +119,11 @@ def process_recap_upload(pq: ProcessingQueue) -> None:
process_recap_claims_register.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.DOCUMENT_ZIP:
process_recap_zip.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.CASE_QUERY_PAGE:
chain(
process_case_query_page.s(pq.pk),
add_or_update_recap_docket.s(),
).apply_async()


def do_pacer_fetch(fq: PacerFetchQueue):
Expand Down Expand Up @@ -823,6 +829,98 @@ def process_recap_docket_history_report(self, pk):
}


@app.task(
bind=True, max_retries=3, interval_start=5 * 60, interval_step=5 * 60
)
def process_case_query_page(self, pk):
"""Process the case query (iquery.pl) page.
:param pk: The primary key of the processing queue item you want to work on
:returns: A dict indicating whether the docket needs Solr re-indexing.
"""

pq = ProcessingQueue.objects.get(pk=pk)
mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS)
logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}")

try:
text = pq.filepath_local.read().decode()
except IOError as exc:
msg = f"Internal processing error ({exc.errno}: {exc.strerror})."
if (self.request.retries == self.max_retries) or pq.debug:
mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED)
return None
else:
mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY)
raise self.retry(exc=exc)

report = CaseQuery(map_cl_to_pacer_id(pq.court_id))
report._parse_text(text)
data = report.data
logger.info(f"Parsing completed for item {pq}")

if data == {}:
# Bad docket iquery page.
msg = "Not a valid case query page upload."
mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT)
self.request.chain = None
return None

# Merge the contents of the docket into CL.
d = find_docket_object(
pq.court_id, pq.pacer_case_id, data["docket_number"]
)
current_case_name = d.case_name
d.add_recap_source()
update_docket_metadata(d, data)

# Update the docket in SOLR if the case name has changed and contains
# docket entries
content_updated = False
if current_case_name != d.case_name and d.pk:
if d.docket_entries.exists():
content_updated = True

if pq.debug:
mark_pq_successful(pq, d_id=d.pk)
self.request.chain = None
return {"docket_pk": d.pk, "content_updated": False}

try:
d.save()
add_bankruptcy_data_to_docket(d, data)
except IntegrityError as exc:
logger.warning(
"Race condition experienced while attempting docket save."
)
error_message = "Unable to save docket due to IntegrityError."
if self.request.retries == self.max_retries:
mark_pq_status(pq, error_message, PROCESSING_STATUS.FAILED)
self.request.chain = None
return None
else:
mark_pq_status(
pq, error_message, PROCESSING_STATUS.QUEUED_FOR_RETRY
)
raise self.retry(exc=exc)

# Add the HTML to the docket in case we need it someday.
pacer_file = PacerHtmlFiles(
content_object=d, upload_type=UPLOAD_TYPE.CASE_QUERY_PAGE
)
pacer_file.filepath.save(
# We only care about the ext w/S3PrivateUUIDStorageTest
"case_report.html",
ContentFile(text.encode()),
)

mark_pq_successful(pq, d_id=d.pk)
return {
"docket_pk": d.pk,
"content_updated": content_updated,
}


@app.task(bind=True, max_retries=3, ignore_result=True)
def process_recap_appellate_docket(self, pk):
"""Process an uploaded appellate docket from the RECAP API endpoint.
Expand Down
20 changes: 20 additions & 0 deletions cl/recap/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,26 @@ def test_ensure_no_users_in_response(self, mock):
j[bad_key]
mock.assert_called()

def test_uploading_a_case_query_page(self, mock):
"""Can we upload a docket iquery page and have it be saved correctly?
Note that this works fine even though we're not actually uploading a
docket due to the mock.
"""
self.data.update(
{"upload_type": UPLOAD_TYPE.CASE_QUERY_PAGE, "document_number": ""}
)
del self.data["pacer_doc_id"]
r = self.client.post(self.path, self.data)
self.assertEqual(r.status_code, HTTP_201_CREATED)

j = json.loads(r.content)
path = reverse(
"processingqueue-detail", kwargs={"version": "v3", "pk": j["id"]}
)
r = self.client.get(path)
self.assertEqual(r.status_code, HTTP_200_OK)


@mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport)
@mock.patch(
Expand Down

0 comments on commit 188c575

Please sign in to comment.