Skip to content

Commit

Permalink
Initial setup (#1)
Browse files Browse the repository at this point in the history
* Add skeleton docs

* initial setup

* initial setup

* initial setup

* initial setup

* initial setup

* initial setup

* initial setup

* initial setup
  • Loading branch information
kstathou authored Dec 2, 2023
1 parent 39e3f02 commit adfc781
Show file tree
Hide file tree
Showing 16 changed files with 4,245 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
OPENAI_API_KEY=<mykey>
WANDB_API_KEY=<mykey>
10 changes: 10 additions & 0 deletions .github/actions/poetry/pre-commit-lint/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
name: Run Lint
description: Run linters to verify code quality. Credits to Andre Sionek for writing the original GitHub Action.

runs:
using: "composite"
steps:
- name: Lint
shell: bash
run: poetry run pre-commit run --all-files
42 changes: 42 additions & 0 deletions .github/actions/poetry/setup/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
name: Set Up Poetry Environment
description: Install Python and Poetry. Credits to Andre Sionek for writing the original GitHub Action.

inputs:
python_version:
description: Python version
required: true
default: '3.11.5'
type: string

poetry_version:
description: Poetry version
required: true
default: 1.5.1
type: string

runs:
using: "composite"
steps:

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python_version }}

- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
version: ${{ inputs.poetry_version }}

- name: Set up Poetry cache
uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python_version }}
cache: poetry

- name: Install Python Dependencies with Poetry
shell: bash
run: poetry install
31 changes: 31 additions & 0 deletions .github/workflows/status-checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
name: Status Checks

on:
push:
branches-ignore:
- main

# Stops the currently running workflow if a new one has been triggered
concurrency:
group: "${{ github.ref }}"
cancel-in-progress: true

jobs:
lint:
name: Lint
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3

- name: Set up
uses: ./.github/actions/poetry/setup
with:
python_version: "3.11.5"

- name: Install Python Lint Dependencies with Poetry
shell: bash
run: poetry install --with lint

- name: Lint
uses: ./.github/actions/poetry/pre-commit-lint
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST

.DS_Store
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
Expand Down
72 changes: 72 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
fail_fast: true

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: check-toml
stages: [commit]

- id: end-of-file-fixer
stages: [commit]

- id: trailing-whitespace
stages: [commit]

- id: check-yaml
stages: [commit]

- id: detect-private-key
stages: [commit]

- id: check-executables-have-shebangs
stages: [commit]

- id: check-shebang-scripts-are-executable
stages: [commit]

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.6
hooks:
# Run the linter.
- id: ruff
args: [--fix]
# Run the formatter.
- id: ruff-format

- repo: local
hooks:
- id: isort
name: Run Isort
entry: poetry run isort
language: system
types: [file, python]
stages: [commit]

- id: yamllint
name: Run Yamllint
entry: poetry run yamllint
language: system
types: [file, yaml]
stages: [commit]

- id: bandit
name: Run Bandit
entry: poetry run bandit
language: system
types: [file, python]
args:
[
--configfile,
pyproject.toml,
--severity-level,
all,
--confidence-level,
all,
--quiet,
--format,
custom,
]
stages: [commit]
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.analysis.typeCheckingMode": "basic"
}
31 changes: 31 additions & 0 deletions .yamllint
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
extends: default

rules:
braces:
level: error
max-spaces-inside: 1
brackets:
level: error
max-spaces-inside: 1
colons:
level: error
commas:
level: error
empty-lines:
level: error
hyphens:
level: error
document-start: disable
indentation:
level: error
indent-sequences: consistent
new-lines:
level: warning
trailing-spaces:
level: warning
line-length:
max: 159
level: error
allow-non-breakable-inline-mappings: true
truthy: disable
32 changes: 32 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
SHELL=/bin/bash

.DEFAULT_GOAL := help

.PHONY: help
help: ## Shows this help text
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

.PHONY: init
init: clean install

.PHONY: clean
clean: ## Removes project virtual env
rm -rf .venv build dist **/*.egg-info .pytest_cache node_modules .coverage

.PHONY: install
install: ## Install the project dependencies and pre-commit using Poetry.
poetry install --with lint
poetry run pre-commit install --hook-type pre-commit --hook-type commit-msg --hook-type pre-push
poetry export -f requirements.txt --without-hashes | awk -F ';' '{print $$1}' > requirements.txt

# .PHONY: test
# test: ## Run tests
# poetry run python -m pytest

.PHONY: lint
lint: ## Apply linters to all files
poetry run pre-commit run --all-files

.PHONY: clean-poetry-lock
clean-poetry-lock: ## Removes poetry.lock from all folders except .venv
find . -name \poetry.lock -type f -not -path "./.venv/*" -delete
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,24 @@
# llm-stack
End-to-end tech stack for the LLM data flywheel

End-to-end tech stack for the LLM data flywheel.

## Chapters

- Building your training set with GPT-4
- Fine-tuning an open-source LLM
- Evaluation
- Human feedback
- Unit tests
- Deployment

## Installation

TODO

## Fine-tuning

### Data

## Contributing

TODO
Loading

0 comments on commit adfc781

Please sign in to comment.