Skip to content

Commit

Permalink
Mod example to use run_with_services
Browse files Browse the repository at this point in the history
  • Loading branch information
ppinchuk committed Jul 3, 2024
1 parent dd4a876 commit 37bf10a
Showing 1 changed file with 17 additions and 24 deletions.
41 changes: 17 additions & 24 deletions examples/ordinance_gpt/parse_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,13 @@
from elm.ords.llm import LLMCaller
from elm.ords.extraction.ordinance import OrdinanceExtractor
from elm.ords.extraction.apply import extract_ordinance_values
from elm.ords.services.provider import RunningAsyncServices
from elm.ords.services.provider import run_with_services
from rex import init_logger

from elm.ords.extraction.apply import (check_for_ordinance_info,
extract_ordinance_text_with_llm)


async def clean_text(services, doc, **kwargs):
text_splitter = RecursiveCharacterTextSplitter(
RTS_SEPARATORS,
chunk_size=3000,
chunk_overlap=300,
length_function=partial(ApiBase.count_tokens, model='gpt-4'),
)
async with RunningAsyncServices(services):
extractor = OrdinanceExtractor(LLMCaller(**kwargs))
doc = await check_for_ordinance_info(doc, text_splitter, **kwargs)
doc = await extract_ordinance_text_with_llm(doc, text_splitter,
extractor)
return doc


async def extract_ordinances(services, doc, **kwargs):
async with RunningAsyncServices(services):
doc = await extract_ordinance_values(doc, **kwargs)
return doc


if __name__ == '__main__':
init_logger('elm', log_level='INFO')

Expand All @@ -50,6 +29,13 @@ async def extract_ordinances(services, doc, **kwargs):

doc = PDFDocument.from_file(fp_pdf)

text_splitter = RecursiveCharacterTextSplitter(
RTS_SEPARATORS,
chunk_size=3000,
chunk_overlap=300,
length_function=partial(ApiBase.count_tokens, model='gpt-4'),
)

azure_api_key, azure_version, azure_endpoint = validate_api_params()
client = openai.AsyncAzureOpenAI(api_key=azure_api_key,
api_version=azure_version,
Expand All @@ -58,9 +44,16 @@ async def extract_ordinances(services, doc, **kwargs):
initialize_service_queue(llm_service.__class__.__name__)
services = [llm_service]
kwargs = dict(llm_service=llm_service, model='gpt-4', temperature=0)
extractor = OrdinanceExtractor(LLMCaller(**kwargs))

check_for_ords = check_for_ordinance_info(doc, text_splitter, **kwargs)
doc = run_with_services(services, check_for_ords)

extract_text = extract_ordinance_text_with_llm(doc, text_splitter,
extractor)
doc = run_with_services(services, extract_text)

doc = asyncio.run(clean_text(services, doc, **kwargs))
doc = asyncio.run(extract_ordinances(services, doc, **kwargs))
doc = run_with_services(services, extract_ordinance_values(doc, **kwargs))

doc.metadata['ordinance_values'].to_csv(fp_ords)
with open(fp_txt_all, 'w') as f:
Expand Down

0 comments on commit 37bf10a

Please sign in to comment.