Skip to content

Commit

Permalink
Merge pull request #20 from SchmidtDSE/switch-to-ml
Browse files Browse the repository at this point in the history
Switch to ML
  • Loading branch information
sampottinger authored Sep 15, 2023
2 parents 352bfd6 + 6a0d3ea commit 2b717ec
Show file tree
Hide file tree
Showing 122 changed files with 28,689 additions and 2,363 deletions.
54 changes: 24 additions & 30 deletions .github/workflows/pipelines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,38 @@ name: Pipeline
on: [push]

jobs:
rpipeline:
sqlpipeline:
runs-on: ubuntu-latest
name: Pipeline
name: SQL Pipeline
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install Sqlite
run: sudo apt install sqlite3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Load R
uses: r-lib/actions/setup-r@v2
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
uses: r-lib/actions/setup-r-dependencies@v2
- name: Prepare R pipeline
run: cd r; bash transform.sh
- name: Run R pipeline
run: cd r; Rscript pipeline.R
- name: Run SQL pipeline
run: cd sql; bash make.sh
- name: Run output checks
run: python python/check_output.py output/overview.csv
python-version: '3.11'
- name: Install other dependencies
run: pip install -r requirements.txt
- name: Execute
run: bash build.sh
- uses: actions/upload-artifact@v3
with:
name: Pipeline Result
path: output
deploy:
runs-on: ubuntu-latest
environment: deploy
name: Deploy
if: github.ref == 'refs/heads/main'
needs: [rpipeline]
steps:
- name: Trigger
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.CROSS_TOKEN }}
repository: SchmidtDSE/plastics-prototype
event-type: data-update
- name: Check output
run: bash confirm_output.sh
# deploy:
# runs-on: ubuntu-latest
# environment: deploy
# name: Deploy
# if: github.ref == 'refs/heads/main'
# needs: [rpipeline, sqlpipeline]
# steps:
# - name: Trigger
# uses: peter-evans/repository-dispatch@v2
# with:
# token: ${{ secrets.CROSS_TOKEN }}
# repository: SchmidtDSE/plastics-prototype
# event-type: data-update
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ output/*.db
output/*.csv
output/*.db
data_workspace/
task
.gitpod.yml
r/pipeline.R
r/pipeline.R
__pycache__
41 changes: 0 additions & 41 deletions DESCRIPTION

This file was deleted.

Empty file added __init__.py
Empty file.
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python -m luigi --module tasks ExportTask --local-scheduler --workers 1
44 changes: 30 additions & 14 deletions python/check_output.py → check_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@
YEARS_REQUIRED = set(range(2010, 2050))


class CheckResult:

def __init__(self, successful, message):
self._successful = successful
self._message = message

def get_successful(self):
return self._successful

def get_message(self):
return self._message


def has_attrs(rows):
row = rows[0]
missing_attrs = filter(lambda x: x not in row, ATTRS_EXPECTED)
Expand All @@ -56,30 +69,33 @@ def has_years(rows):
return True


def main():
if len(sys.argv) != NUM_ARGS + 1:
print(USAGE_STR)
sys.exit(1)

loc = sys.argv[1]

def check(loc):
with open(loc) as f:
rows = list(csv.DictReader(f))

if not has_attrs(rows):
print('Columns missing.')
sys.exit(1)
return CheckResult(False, 'Columns missing.')

if not has_regions(rows):
print('Unexpected regions.')
sys.exit(1)
return CheckResult(False, 'Unexpected regions.')

if not has_years(rows):
print('Unexpected years.')
return CheckResult(False, 'Unexpected years.')

return CheckResult(True, 'Passed checks.')


def main():
if len(sys.argv) != NUM_ARGS + 1:
print(USAGE_STR)
sys.exit(1)

print('Passed checks.')
sys.exit(0)
loc = sys.argv[1]

result = check(loc)

if not result.get_successful():
raise RuntimeError('Failed with: ' + result.get_message())


if __name__ == '__main__':
Expand Down
47 changes: 47 additions & 0 deletions check_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import csv
import sys

USAGE_STR = 'python check_summary_percents.py [file]'
NUM_ARGS = 1


def check(loc):
with open(loc) as f:
results = list(csv.DictReader(f))

row = results[0]
assert 'year' in row
assert 'region' in row
assert 'inputProduceFiberMT' in row
assert 'inputProduceResinMT' in row
assert 'inputImportResinMT' in row
assert 'inputImportArticlesMT' in row
assert 'inputImportGoodsMT' in row
assert 'inputImportFiberMT' in row
assert 'inputAdditivesMT' in row
assert 'consumptionAgricultureMT' in row
assert 'consumptionConstructionMT' in row
assert 'consumptionElectronicMT' in row
assert 'consumptionHouseholdLeisureSportsMT' in row
assert 'consumptionPackagingMT' in row
assert 'consumptionTransporationMT' in row
assert 'consumptionTextileMT' in row
assert 'consumptionOtherMT' in row
assert 'eolRecyclingPercent' in row
assert 'eolIncinerationPercent' in row
assert 'eolLandfillPercent' in row
assert 'eolMismanagedPercent' in row

assert len(results) > 50


def main():
if len(sys.argv) != NUM_ARGS + 1:
print(USAGE_STR)
sys.exit(1)

check(sys.argv[1])


if __name__ == '__main__':
main()
5 changes: 5 additions & 0 deletions clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
rm -r data_workspace
rm -r output
rm -r task
mkdir task
cp job_template.json task/job.json
44 changes: 44 additions & 0 deletions clean_filenames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import re
import sys

USAGE_STR = 'python clean_filenames.py [directory]'
NUM_ARGS = 1


def clean_name(target_path):
pieces = target_path.split('/')
filename = pieces[-1]
filename_clean = re.sub(r'[^A-Za-z0-9\\.]', '', filename).lower()
pieces[-1] = filename_clean
return '/'.join(pieces)


def execute(directory_path):
all_contents = os.listdir(directory_path)
all_contents_expand = map(
lambda x: os.path.join(directory_path, x),
all_contents
)
file_paths = filter(lambda x: os.path.isfile(x), all_contents_expand)
file_paths_rewrite = map(
lambda x: (x, clean_name(x)),
file_paths
)

for (prior, new) in file_paths_rewrite:
os.rename(prior, new)


def main():
if len(sys.argv) != NUM_ARGS + 1:
print(USAGE_STR)
sys.exit(1)

directory_path = sys.argv[1]

execute(directory_path)


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions confirm_output.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[ -f "output/overview_ml.csv" ] || exit 1
[ -f "output/overview_curve.csv" ] || exit 2
[ -f "output/overview_naive.csv" ] || exit 3
73 changes: 73 additions & 0 deletions const.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
import pathlib


PARENT_DIR = pathlib.Path(__file__).parent.absolute()
DEFAULT_TASK_DIR = os.path.join(PARENT_DIR, 'task')
SQL_DIR = os.path.join(PARENT_DIR, 'sql')

PREPROC_FIELD_NAMES = [
'year',
'region',
'inputProduceFiberMT',
'inputProduceResinMT',
'inputImportResinMT',
'inputImportArticlesMT',
'inputImportGoodsMT',
'inputImportFiberMT',
'inputAdditivesMT',
'netImportArticlesMT',
'netImportFibersMT',
'netImportGoodsMT',
'netImportResinMT',
'consumptionAgricultureMT',
'consumptionConstructionMT',
'consumptionElectronicMT',
'consumptionHouseholdLeisureSportsMT',
'consumptionPackagingMT',
'consumptionTransporationMT',
'consumptionTextileMT',
'consumptionOtherMT',
'eolRecyclingPercent',
'eolIncinerationPercent',
'eolLandfillPercent',
'eolMismanagedPercent'
]

AUX_FIELD_NAMES = [
'year',
'region',
'population',
'gdp'
]

LIFECYCLE_DISTRIBUTIONS = {
'consumptionAgricultureMT': {'mean': 2, 'std': 1},
'consumptionConstructionMT': {'mean': 35, 'std': 7},
'consumptionElectronicMT': {'mean': 8, 'std': 2},
'consumptionHouseholdLeisureSportsMT': {'mean': 3, 'std': 1},
'consumptionPackagingMT': {'mean': 0.5, 'std': 0.1},
'consumptionTransporationMT': {'mean': 13, 'std': 3},
'consumptionTextileMT': {'mean': 5, 'std': 1.5},
'consumptionOtherMT': {'mean': 5, 'std': 1.5}
}

EXPORT_FIELD_NAMES = [
'year',
'region',
'eolRecyclingMT',
'eolLandfillMT',
'eolIncinerationMT',
'eolMismanagedMT',
'consumptionAgricultureMT',
'consumptionConstructionMT',
'consumptionElectronicMT',
'consumptionHouseholdLeisureSportsMT',
'consumptionPackagingMT',
'consumptionTransporationMT',
'consumptionTextileMT',
'consumptionOtherMT',
'netImportsMT',
'netExportsMT',
'domesticProductionMT'
]
File renamed without changes.
Binary file removed data/02_Data_Raw/.DS_Store
Binary file not shown.
9 changes: 0 additions & 9 deletions data/02_Data_Raw/08.1_China_End_Use_and_Type.csv

This file was deleted.

9 changes: 0 additions & 9 deletions data/02_Data_Raw/09.1_NAFTA_End_Use_and_Type.csv

This file was deleted.

Loading

0 comments on commit 2b717ec

Please sign in to comment.