Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: adding llm example #4023

Merged
merged 12 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions examples/quick-start-llm-python/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
_venv
.env

node_modules

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
node_modules/
/test-results/
/playwright-report/
/blob-report/
/playwright/.cache/
1 change: 1 addition & 0 deletions examples/quick-start-llm-python/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
37 changes: 37 additions & 0 deletions examples/quick-start-llm-python/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
help: Makefile ## show list of commands
@echo "Choose a command to run:"
@echo ""
@awk 'BEGIN {FS = ":.*?## "} /[a-zA-Z_-]+:.*?## / {sub("\\\\n",sprintf("\n%22c"," "), $$2);printf "\033[36m%-40s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) | sort

# Added to skip list to avoid warnings
DISABLED_INSTRUMENTATIONS=aleph_alpha_client,chromadb,cohere,groq,haystack-ai,lancedb,llama-index,marqo,milvus,mistralai,pinecone_client,qdrant_client,replicate,together,google_cloud_aiplatform,ibm-watson-machine-learning,weaviate_client

build/docker: ## build images used by docker compose file
@docker compose build

start/on-docker: build/docker ## build and run app using docker compose
@docker compose up

start/on-docker/only-observability: ## run observability stack using docker compose in debug mode
@docker compose up -d otel-collector jaeger

start/local-ui: start/on-docker/only-observability ## run UI app using docker compose
@OTEL_SERVICE_NAME=quick-start-llm \
OTEL_TRACES_EXPORTER=otlp \
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:4317 \
OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=$(DISABLED_INSTRUMENTATIONS) \
opentelemetry-instrument streamlit run ./app/streamlit_app.py

start/local-api: start/on-docker/only-observability ## run UI app using docker compose
@OTEL_SERVICE_NAME=quick-start-llm \
OTEL_TRACES_EXPORTER=otlp \
OTEL_METRICS_EXPORTER=none \
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:4317 \
OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=$(DISABLED_INSTRUMENTATIONS) \
opentelemetry-instrument python ./app/flask_app.py

stop: ## stop all running containers
@docker compose down

test: ## run e2e tests
@tracetest run -f ./tests/run-gemini.yaml
55 changes: 55 additions & 0 deletions examples/quick-start-llm-python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
## Quick Start LLM app

This is an example of a simple LLM app that uses the `langchain` library to summarize the content of a URL, based on [this example](https://github.com/alphasecio/langchain-examples/tree/main/url-summary)

### Running example with docker

```bash
make start/on-docker
```

### Running example with locally

#### Setting up the environment

```bash

# create venv
python -m venv ./_venv

# activate env
source _venv/bin/activate

# install requirements
pip install -r app/requirements.txt

# install OTel auto-instrumentation
opentelemetry-bootstrap -a install

# add openai api key
echo "OPENAI_API_KEY={your-open-ai-api-key}" >> .env
# add google gemini api key
echo "GOOGLE_API_KEY={your-google-gemini-api-key}" >> .env

# add tracetest agent keys
echo "TRACETEST_API_KEY={your-tracetest-api-key}" >> .env
echo "TRACETEST_ENVIRONMENT_ID={your-tracetest-env-id}" >> .env

# add tracetest token for playwright tests with TS lib
echo "TRACETEST_API_TOKEN={your-tracetest-token-for-ts-libs}" >> ./tests/.env
```

#### Running the apps

```bash

# start the app running the UI locally
make start/local-ui

# start the app running the API locally
make start/local-api

# start the app everything on docker
make start/on-docker

```
9 changes: 9 additions & 0 deletions examples/quick-start-llm-python/app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM python:3.12.4-slim

WORKDIR /opt/app
COPY requirements.txt ./

RUN pip install --no-cache-dir -r requirements.txt
RUN opentelemetry-bootstrap -a install

COPY . .
3 changes: 3 additions & 0 deletions examples/quick-start-llm-python/app/example.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Born in London, Turing was raised in southern England. He graduated from King's College, Cambridge, and in 1938, earned a doctorate degree from Princeton University. During World War II, Turing worked for the Government Code and Cypher School at Bletchley Park, Britain's codebreaking centre that produced Ultra intelligence. He led Hut 8, the section responsible for German naval cryptanalysis. Turing devised techniques for speeding the breaking of German ciphers, including improvements to the pre-war Polish bomba method, an electromechanical machine that could find settings for the Enigma machine. He played a crucial role in cracking intercepted messages that enabled the Allies to defeat the Axis powers in many crucial engagements, including the Battle of the Atlantic.

After the war, Turing worked at the National Physical Laboratory, where he designed the Automatic Computing Engine, one of the first designs for a stored-program computer. In 1948, Turing joined Max Newman's Computing Machine Laboratory at the Victoria University of Manchester, where he helped develop the Manchester computers[12] and became interested in mathematical biology. Turing wrote on the chemical basis of morphogenesis and predicted oscillating chemical reactions such as the Belousov–Zhabotinsky reaction, first observed in the 1960s. Despite these accomplishments, he was never fully recognised during his lifetime because much of his work was covered by the Official Secrets Act.
54 changes: 54 additions & 0 deletions examples/quick-start-llm-python/app/flask_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Initialize telemetry
from telemetry import init as telemetry_init
tracer = telemetry_init() # run telemetry.init() before loading any other modules to capture any module-level telemetry

from opentelemetry import trace
from opentelemetry.instrumentation.flask import FlaskInstrumentor

# from telemetry import heartbeat as telemetry_heartbeat
# telemetry_heartbeat(tracer)

from llm.providers import get_provider, get_providers
from flask import Flask, request, jsonify, make_response

instrumentor = FlaskInstrumentor()

app = Flask(__name__)
instrumentor.instrument_app(app)

api_port = '8800'

@app.route('/summarizeText', methods=['POST'])
def summarize_text():
data = request.json

provider_type = data['provider']

providers = get_providers()
has_provider = provider_type in providers

if not has_provider:
return make_response(jsonify({ "error": "Invalid provider" }), 400)

source_text = data['text']

provider = get_provider(provider_type)
summarize_text = provider.summarize(source_text)

# Get trace ID from current span
span = trace.get_current_span()
trace_id = span.get_span_context().trace_id

# Convert trace_id to a hex string
trace_id_hex = format(trace_id, '032x')

return jsonify({"summary": summarize_text, "trace_id": trace_id_hex})

if __name__ == '__main__':
print('Running on port: ' + api_port)
app.run(host='0.0.0.0', port=api_port)
23 changes: 23 additions & 0 deletions examples/quick-start-llm-python/app/llm/provider_google_gemini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from langchain.chains.summarize import load_summarize_chain
from langchain_community.docstore.document import Document
from langchain_text_splitters import CharacterTextSplitter

from langchain_google_genai import ChatGoogleGenerativeAI

class GoogleGeminiProvider:
def provider_name(self):
return "Google (Gemini)"

def summarize(self, text):
chat = ChatGoogleGenerativeAI(model="gemini-pro")

# Split the source text
text_splitter = CharacterTextSplitter()
texts = text_splitter.split_text(text)

# Create Document objects for the texts (max 3 pages)
docs = [Document(page_content=t) for t in texts[:3]]

# Load and run the summarize chain
chain = load_summarize_chain(chat, chain_type="map_reduce")
return chain.run(docs)
Loading
Loading