Skip to content

Commit

Permalink
Further big_data_passing fixes after PR kubeflow#166 (kubeflow#194)
Browse files Browse the repository at this point in the history
* further big_data_passing fixes after PR kubeflow#166

* further big_data_passing fixes after PR kubeflow#166

* address comments from reviewers

* Address comments form Tomcli
  • Loading branch information
fenglixa authored Jun 24, 2020
1 parent 196b822 commit 9267f1f
Show file tree
Hide file tree
Showing 7 changed files with 253 additions and 270 deletions.
4 changes: 4 additions & 0 deletions sdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,7 @@ the [FEATURES](FEATURES.md) doc.
- If you run into the error `bad interpreter: No such file or director` when trying to use
python's venv, remove the current virtual environment in the `.venv` directory and
create a new one using `virtualenv .venv`

- For big data passing, user need to create PV manually, or enable dynamic volume provisioning, refer to the link of: https://kubernetes.io/docs/concepts/storage/dynamic-provisioning

User need to create pvc manually with the pvc name same as pipelinerun name until [issue #181](https://github.com/kubeflow/kfp-tekton/issues/181) addressed
220 changes: 78 additions & 142 deletions sdk/python/kfp_tekton/compiler/_data_passing_rewriter.py

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions sdk/python/tests/compiler/testdata/big_data_passing.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,5 +218,4 @@ def file_passing_pipelines():
if __name__ == '__main__':
from kfp_tekton.compiler import TektonCompiler
TektonCompiler().compile(file_passing_pipelines,
__file__.replace('.py', '.yaml'),
generate_pipelinerun=True)
__file__.replace('.py', '.yaml'))
200 changes: 100 additions & 100 deletions sdk/python/tests/compiler/testdata/big_data_passing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,14 @@ spec:
tasks:
- name: repeat-line
taskSpec:
results:
- description: /tmp/outputs/output_text/data
name: output_text
steps:
- args:
- --line
- Hello
- --count
- '5000'
- --output-text
- $(results.output-text.path)
- $(workspaces.repeat-line.path)/repeat-line-output_text
command:
- python3
- -u
Expand All @@ -62,22 +59,19 @@ spec:
\ pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: repeat-line
workspaces:
- name: repeat-line
workspace: file-passing-pipelines
- name: print-text
params:
- name: repeat-line-output_text
value: $(tasks.repeat-line.results.output_text)
runAfter:
- repeat-line
taskSpec:
artifacts:
- name: text
path: /tmp/inputs/text/data
raw:
data: $(inputs.params.repeat-line-output_text)
params:
- name: repeat-line-output_text
steps:
- args:
- --text
- /tmp/inputs/text/data
- $(workspaces.print-text.path)/repeat-line-output_text
command:
- python3
- -u
Expand All @@ -96,44 +90,52 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: print-text
workspaces:
- name: print-text
workspace: file-passing-pipelines
- name: split-text-lines
taskSpec:
artifacts:
- name: source
path: /tmp/inputs/source/data
raw:
data: 'one
stepTemplate:
volumeMounts:
- mountPath: /tmp/inputs/source
name: source
steps:
- image: busybox
name: copy-inputs
script: '#!/bin/sh
two
set -exo pipefail
three
echo -n "one
four
two
five
three
six
four
seven
five
eight
six
nine
seven
ten'
results:
- description: /tmp/outputs/odd_lines/data
name: odd_lines
- description: /tmp/outputs/even_lines/data
name: even_lines
steps:
eight
nine
ten" > /tmp/inputs/source/data
'
- args:
- --source
- /tmp/inputs/source/data
- --odd-lines
- $(results.odd-lines.path)
- $(workspaces.split-text-lines.path)/split-text-lines-odd_lines
- --even-lines
- $(results.even-lines.path)
- $(workspaces.split-text-lines.path)/split-text-lines-even_lines
command:
- python3
- -u
Expand Down Expand Up @@ -163,22 +165,22 @@ spec:
\ pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
volumes:
- emptyDir: {}
name: source
workspaces:
- name: split-text-lines
workspaces:
- name: split-text-lines
workspace: file-passing-pipelines
- name: print-text-2
params:
- name: split-text-lines-odd_lines
value: $(tasks.split-text-lines.results.odd_lines)
runAfter:
- split-text-lines
taskSpec:
artifacts:
- name: text
path: /tmp/inputs/text/data
raw:
data: $(inputs.params.split-text-lines-odd_lines)
params:
- name: split-text-lines-odd_lines
steps:
- args:
- --text
- /tmp/inputs/text/data
- $(workspaces.print-text-2.path)/split-text-lines-odd_lines
command:
- python3
- -u
Expand All @@ -197,22 +199,19 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: print-text-2
workspaces:
- name: print-text-2
workspace: file-passing-pipelines
- name: print-text-3
params:
- name: split-text-lines-even_lines
value: $(tasks.split-text-lines.results.even_lines)
runAfter:
- split-text-lines
taskSpec:
artifacts:
- name: text
path: /tmp/inputs/text/data
raw:
data: $(inputs.params.split-text-lines-even_lines)
params:
- name: split-text-lines-even_lines
steps:
- args:
- --text
- /tmp/inputs/text/data
- $(workspaces.print-text-3.path)/split-text-lines-even_lines
command:
- python3
- -u
Expand All @@ -231,17 +230,19 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: print-text-3
workspaces:
- name: print-text-3
workspace: file-passing-pipelines
- name: write-numbers
taskSpec:
results:
- description: /tmp/outputs/numbers/data
name: numbers
steps:
- args:
- --count
- '100000'
- --numbers
- $(results.numbers.path)
- $(workspaces.write-numbers.path)/write-numbers-numbers
command:
- python3
- -u
Expand All @@ -264,22 +265,19 @@ spec:
\ pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: write-numbers
workspaces:
- name: write-numbers
workspace: file-passing-pipelines
- name: print-text-4
params:
- name: write-numbers-numbers
value: $(tasks.write-numbers.results.numbers)
runAfter:
- write-numbers
taskSpec:
artifacts:
- name: text
path: /tmp/inputs/text/data
raw:
data: $(inputs.params.write-numbers-numbers)
params:
- name: write-numbers-numbers
steps:
- args:
- --text
- /tmp/inputs/text/data
- $(workspaces.print-text-4.path)/write-numbers-numbers
command:
- python3
- -u
Expand All @@ -298,27 +296,21 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: print-text-4
workspaces:
- name: print-text-4
workspace: file-passing-pipelines
- name: sum-numbers
params:
- name: write-numbers-numbers
value: $(tasks.write-numbers.results.numbers)
runAfter:
- write-numbers
taskSpec:
artifacts:
- name: numbers
path: /tmp/inputs/numbers/data
raw:
data: $(inputs.params.write-numbers-numbers)
params:
- name: write-numbers-numbers
results:
- description: /tmp/outputs/Output/data
name: output
steps:
- args:
- --numbers
- /tmp/inputs/numbers/data
- $(workspaces.sum-numbers.path)/write-numbers-numbers
- '----output-paths'
- $(results.output.path)
- $(workspaces.sum-numbers.path)/sum-numbers-output
command:
- python3
- -u
Expand All @@ -342,22 +334,19 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: sum-numbers
workspaces:
- name: sum-numbers
workspace: file-passing-pipelines
- name: print-text-5
params:
- name: sum-numbers-output
value: $(tasks.sum-numbers.results.output)
runAfter:
- sum-numbers
taskSpec:
artifacts:
- name: text
path: /tmp/inputs/text/data
raw:
data: $(inputs.params.sum-numbers-output)
params:
- name: sum-numbers-output
steps:
- args:
- --text
- /tmp/inputs/text/data
- $(workspaces.print-text-5.path)/sum-numbers-output
command:
- python3
- -u
Expand All @@ -376,6 +365,11 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: print-text-5
workspaces:
- name: print-text-5
workspace: file-passing-pipelines
- name: gen-params
taskSpec:
results:
Expand Down Expand Up @@ -432,3 +426,9 @@ spec:
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
image: tensorflow/tensorflow:1.13.2-py3
name: main
workspaces:
- name: file-passing-pipelines
workspaces:
- name: file-passing-pipelines
persistentVolumeClaim:
claimName: file-passing-pipelines
Loading

0 comments on commit 9267f1f

Please sign in to comment.