Skip to content

Commit

Permalink
fix(recap): Use CaseQuery report to parse iquery pages uploaded via r…
Browse files Browse the repository at this point in the history
…ecap extension
  • Loading branch information
albertisfu committed Nov 22, 2022
1 parent 9832fb9 commit 5aaf1fd
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 3.2.16 on 2022-11-21 20:07
# Generated by Django 3.2.16 on 2022-11-22 22:57

from django.db import migrations, models

Expand All @@ -13,11 +13,11 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name='pacerhtmlfiles',
name='upload_type',
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Docket iQuery page')], help_text='The type of object that is uploaded'),
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
),
migrations.AlterField(
model_name='processingqueue',
name='upload_type',
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Docket iQuery page')], help_text='The type of object that is uploaded'),
field=models.SmallIntegerField(choices=[(1, 'HTML Docket'), (2, 'HTML attachment page'), (3, 'PDF'), (4, 'Docket history report'), (5, 'Appellate HTML docket'), (6, 'Appellate HTML attachment page'), (7, 'Internet Archive XML docket'), (8, 'Case report (iquery.pl) page'), (9, 'Claims register page'), (10, 'Zip archive of RECAP Documents'), (11, 'Email in the SES storage format'), (12, 'Case query page')], help_text='The type of object that is uploaded'),
),
]
4 changes: 2 additions & 2 deletions cl/recap/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class UPLOAD_TYPE:
CLAIMS_REGISTER = 9
DOCUMENT_ZIP = 10
SES_EMAIL = 11
IQUERY_PAGE = 12
CASE_QUERY_PAGE = 12

NAMES = (
(DOCKET, "HTML Docket"),
Expand All @@ -34,7 +34,7 @@ class UPLOAD_TYPE:
(CLAIMS_REGISTER, "Claims register page"),
(DOCUMENT_ZIP, "Zip archive of RECAP Documents"),
(SES_EMAIL, "Email in the SES storage format"),
(IQUERY_PAGE, "Docket iQuery page"),
(CASE_QUERY_PAGE, "Case query page"),
)


Expand Down
31 changes: 22 additions & 9 deletions cl/recap/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from juriscraper.pacer import (
AppellateDocketReport,
AttachmentPage,
CaseQuery,
ClaimsRegister,
DocketHistoryReport,
DocketReport,
Expand Down Expand Up @@ -118,8 +119,11 @@ def process_recap_upload(pq: ProcessingQueue) -> None:
process_recap_claims_register.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.DOCUMENT_ZIP:
process_recap_zip.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.IQUERY_PAGE:
process_docket_iquery_page.delay(pq.pk)
elif pq.upload_type == UPLOAD_TYPE.CASE_QUERY_PAGE:
chain(
process_case_query_page.s(pq.pk),
add_or_update_recap_docket.s(),
).apply_async()


def do_pacer_fetch(fq: PacerFetchQueue):
Expand Down Expand Up @@ -828,8 +832,8 @@ def process_recap_docket_history_report(self, pk):
@app.task(
bind=True, max_retries=3, interval_start=5 * 60, interval_step=5 * 60
)
def process_docket_iquery_page(self, pk):
"""Process the Docket iQuery page.
def process_case_query_page(self, pk):
"""Process the case query (iquery.pl) page.
:param pk: The primary key of the processing queue item you want to work on
:returns: A dict indicating whether the docket needs Solr re-indexing.
Expand All @@ -850,14 +854,14 @@ def process_docket_iquery_page(self, pk):
mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY)
raise self.retry(exc=exc)

report = DocketHistoryReport(map_cl_to_pacer_id(pq.court_id))
report = CaseQuery(map_cl_to_pacer_id(pq.court_id))
report._parse_text(text)
data = report.data
logger.info(f"Parsing completed for item {pq}")

if data == {}:
# Bad docket iquery page.
msg = "Not a valid docket iquery page upload."
msg = "Not a valid case query page upload."
mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT)
self.request.chain = None
return None
Expand All @@ -866,16 +870,24 @@ def process_docket_iquery_page(self, pk):
d = find_docket_object(
pq.court_id, pq.pacer_case_id, data["docket_number"]
)
content_updated = False
current_case_name = d.case_name
d.add_recap_source()
update_docket_metadata(d, data)

if current_case_name != d.case_name or not d.pk:
# This docket should be added to Solr or updated since is new or the
# case name has changed.
content_updated = True

if pq.debug:
mark_pq_successful(pq, d_id=d.pk)
self.request.chain = None
return {"docket_pk": d.pk, "content_updated": False}

try:
d.save()
add_bankruptcy_data_to_docket(d, data)
except IntegrityError as exc:
logger.warning(
"Race condition experienced while attempting docket save."
Expand All @@ -893,17 +905,18 @@ def process_docket_iquery_page(self, pk):

# Add the HTML to the docket in case we need it someday.
pacer_file = PacerHtmlFiles(
content_object=d, upload_type=UPLOAD_TYPE.IQUERY_PAGE
content_object=d, upload_type=UPLOAD_TYPE.CASE_QUERY_PAGE
)
pacer_file.filepath.save(
# We only care about the ext w/S3PrivateUUIDStorageTest
"docket_iquery_page.html",
"case_report.html",
ContentFile(text.encode()),
)

mark_pq_successful(pq, d_id=d.pk)
return {
"docket_pk": d.pk,
"content_updated": False,
"content_updated": content_updated,
}


Expand Down
4 changes: 2 additions & 2 deletions cl/recap/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,14 +287,14 @@ def test_ensure_no_users_in_response(self, mock):
j[bad_key]
mock.assert_called()

def test_uploading_a_docket_iquery_page(self, mock):
def test_uploading_a_case_query_page(self, mock):
"""Can we upload a docket iquery page and have it be saved correctly?
Note that this works fine even though we're not actually uploading a
docket due to the mock.
"""
self.data.update(
{"upload_type": UPLOAD_TYPE.IQUERY_PAGE, "document_number": ""}
{"upload_type": UPLOAD_TYPE.CASE_QUERY_PAGE, "document_number": ""}
)
del self.data["pacer_doc_id"]
r = self.client.post(self.path, self.data)
Expand Down

0 comments on commit 5aaf1fd

Please sign in to comment.