Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accept recap uploads for docket iQuery pages #2374

Merged
merged 3 commits into from
Nov 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions cl/recap/migrations/0009_alter_processingqueue_upload_type_noop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.16 on 2022-11-22 22:57

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('recap', '0008_alter_nos_noop'),
]

operations = [
migrations.AlterField(
model_name='pacerhtmlfiles',
name='upload_type',
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
),
migrations.AlterField(
model_name='processingqueue',
name='upload_type',
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
BEGIN;
--
-- Alter field upload_type on pacerhtmlfiles
--
--
-- Alter field upload_type on processingqueue
--
COMMIT;
2 changes: 2 additions & 0 deletions cl/recap/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class UPLOAD_TYPE:
CLAIMS_REGISTER = 9
DOCUMENT_ZIP = 10
SES_EMAIL = 11
CASE_QUERY_PAGE = 12

NAMES = (
(DOCKET, "HTML Docket"),
Expand All @@ -33,6 +34,7 @@ class UPLOAD_TYPE:
(CLAIMS_REGISTER, "Claims register page"),
(DOCUMENT_ZIP, "Zip archive of RECAP Documents"),
(SES_EMAIL, "Email in the SES storage format"),
(CASE_QUERY_PAGE, "Case query page"),
)


Expand Down
98 changes: 98 additions & 0 deletions cl/recap/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from juriscraper.pacer import (
AppellateDocketReport,
AttachmentPage,
CaseQuery,
ClaimsRegister,
DocketHistoryReport,
DocketReport,
Expand Down Expand Up @@ -118,6 +119,11 @@ def process_recap_upload(pq: ProcessingQueue) -> None:
process_recap_claims_register.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.DOCUMENT_ZIP:
process_recap_zip.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.CASE_QUERY_PAGE:
chain(
process_case_query_page.s(pq.pk),
add_or_update_recap_docket.s(),
).apply_async()


def do_pacer_fetch(fq: PacerFetchQueue):
Expand Down Expand Up @@ -823,6 +829,98 @@ def process_recap_docket_history_report(self, pk):
}


@app.task(
bind=True, max_retries=3, interval_start=5 * 60, interval_step=5 * 60
)
def process_case_query_page(self, pk):
"""Process the case query (iquery.pl) page.

:param pk: The primary key of the processing queue item you want to work on
:returns: A dict indicating whether the docket needs Solr re-indexing.
"""

pq = ProcessingQueue.objects.get(pk=pk)
mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS)
logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}")

try:
text = pq.filepath_local.read().decode()
except IOError as exc:
msg = f"Internal processing error ({exc.errno}: {exc.strerror})."
if (self.request.retries == self.max_retries) or pq.debug:
mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED)
return None
else:
mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY)
raise self.retry(exc=exc)

report = CaseQuery(map_cl_to_pacer_id(pq.court_id))
report._parse_text(text)
data = report.data
logger.info(f"Parsing completed for item {pq}")

if data == {}:
# Bad docket iquery page.
msg = "Not a valid case query page upload."
mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT)
self.request.chain = None
return None

# Merge the contents of the docket into CL.
d = find_docket_object(
pq.court_id, pq.pacer_case_id, data["docket_number"]
)
current_case_name = d.case_name
d.add_recap_source()
update_docket_metadata(d, data)

# Update the docket in SOLR if the case name has changed and contains
# docket entries
content_updated = False
if current_case_name != d.case_name and d.pk:
if d.docket_entries.exists():
content_updated = True

if pq.debug:
mark_pq_successful(pq, d_id=d.pk)
self.request.chain = None
return {"docket_pk": d.pk, "content_updated": False}

try:
d.save()
add_bankruptcy_data_to_docket(d, data)
except IntegrityError as exc:
logger.warning(
"Race condition experienced while attempting docket save."
)
error_message = "Unable to save docket due to IntegrityError."
if self.request.retries == self.max_retries:
mark_pq_status(pq, error_message, PROCESSING_STATUS.FAILED)
self.request.chain = None
return None
else:
mark_pq_status(
pq, error_message, PROCESSING_STATUS.QUEUED_FOR_RETRY
)
raise self.retry(exc=exc)

# Add the HTML to the docket in case we need it someday.
pacer_file = PacerHtmlFiles(
content_object=d, upload_type=UPLOAD_TYPE.CASE_QUERY_PAGE
)
pacer_file.filepath.save(
# We only care about the ext w/S3PrivateUUIDStorageTest
"case_report.html",
ContentFile(text.encode()),
)

mark_pq_successful(pq, d_id=d.pk)
return {
"docket_pk": d.pk,
"content_updated": content_updated,
}


@app.task(bind=True, max_retries=3, ignore_result=True)
def process_recap_appellate_docket(self, pk):
"""Process an uploaded appellate docket from the RECAP API endpoint.
Expand Down
20 changes: 20 additions & 0 deletions cl/recap/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,26 @@ def test_ensure_no_users_in_response(self, mock):
j[bad_key]
mock.assert_called()

def test_uploading_a_case_query_page(self, mock):
"""Can we upload a docket iquery page and have it be saved correctly?

Note that this works fine even though we're not actually uploading a
docket due to the mock.
"""
self.data.update(
{"upload_type": UPLOAD_TYPE.CASE_QUERY_PAGE, "document_number": ""}
)
del self.data["pacer_doc_id"]
r = self.client.post(self.path, self.data)
self.assertEqual(r.status_code, HTTP_201_CREATED)

j = json.loads(r.content)
path = reverse(
"processingqueue-detail", kwargs={"version": "v3", "pk": j["id"]}
)
r = self.client.get(path)
self.assertEqual(r.status_code, HTTP_200_OK)


@mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport)
@mock.patch(
Expand Down