Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK/Components/PythonContainerOp - Switch from dict to ComponentSpec #396

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 24 additions & 23 deletions sdk/python/kfp/compiler/_component_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from google.cloud import storage
from pathlib import PurePath, Path
from .. import dsl
from ..components._components import _create_task_factory_from_component_dict
from ..components._components import _create_task_factory_from_component_spec
from ._k8s_helper import K8sHelper

class GCSHelper(object):
Expand Down Expand Up @@ -418,42 +418,43 @@ def _generate_pythonop(component_func, target_image, target_component_file=None)
The returned value is in fact a function, which should generates a container_op instance. """

from ..components._python_op import _python_function_name_to_component_name
from ..components import InputSpec, OutputSpec, ImplementationSpec, ContainerSpec, ComponentSpec


#Component name and description are derived from the function's name and docstribng, but can be overridden by @python_component function decorator
#The decorator can set the _component_human_name and _component_description attributes. getattr is needed to prevent error when these attributes do not exist.
component_name = getattr(component_func, '_component_human_name', None) or _python_function_name_to_component_name(component_func.__name__)
component_description = getattr(component_func, '_component_description', None) or (component_func.__doc__.strip() if component_func.__doc__ else None)

#TODO: Humanize the input/output names
input_names = inspect.getfullargspec(component_func)[0]

component_artifact = {}
component_artifact['name'] = component_name
component_artifact['description'] = component_description
component_artifact['outputs'] = [{'name': 'output'}]
component_artifact['inputs'] = []
component_artifact['implementation'] = {
'container': {
'image': target_image,
'arguments': [],
'fileOutputs': {
'output': '/output.txt'
}
}
}
for input in input_names:
component_artifact['inputs'].append({
'name': input,
'type': 'str'
})
component_artifact['implementation']['container']['arguments'].append({'value': input})
output_name = 'output'
output_file = '/output.txt' #TODO: change the output path to /outputs/output/file here and in code generator
component_spec = ComponentSpec(
name=component_name,
description=component_description,
inputs=[InputSpec(name=input_name, type='str') for input_name in input_names], #TODO: Chnage type to actual type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to change the type to actual type.
The component built already does the serialization, thus the types here are all strings.

outputs=[OutputSpec(name=output_name)],
implementation=ImplementationSpec(
container=ContainerSpec(
image=target_image,
#command=['python3', program_file], #TODO: Include the command line
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need the command right? The entrypoint is already built into the image. So remove the commented line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Command line needs to be known for the operations to be manageable. I think we should promote this. In this particular case this would require small refactoring.

arguments=[{'value': input_name} for input_name in input_names],
file_outputs={ #TODO: Use proper output arguments (e.g. "{output: output_name}" ) instead of this workaround. Our 1st-party components should not be using the file_outputs workaround.
output_name: output_file,
}
)
)
)

target_component_file = target_component_file or getattr(component_func, '_component_target_component_file', None)
if target_component_file:
from ..components._yaml_utils import dump_yaml
component_text = dump_yaml(component_artifact)
component_text = dump_yaml(component_spec.to_struct())
Path(target_component_file).write_text(component_text)

return _create_task_factory_from_component_dict(component_artifact)
return _create_task_factory_from_component_spec(component_spec)

def build_python_component(component_func, target_image, base_image=None, dependency=[], staging_gcs_path=None, build_image=True, timeout=600, namespace='kubeflow', target_component_file=None):
""" build_component automatically builds a container image for the component_func
Expand Down