Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK - Refactored _func_to_component_spec to split code generation from signature analysis #1334

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 54 additions & 54 deletions sdk/python/kfp/components/_python_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ._components import _create_task_factory_from_component_spec
from ._structures import *

import inspect
from pathlib import Path
from typing import TypeVar, Generic, List

Expand Down Expand Up @@ -85,40 +86,13 @@ def _capture_function_code_using_cloudpickle(func, modules_to_capture: List[str]
return '\n'.join(code_lines)


def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None) -> ComponentSpec:
'''Takes a self-contained python function and converts it to component

Args:
func: Required. The function to be converted
base_image: Optional. Docker image to be used as a base image for the python component. Must have python 3.5+ installed. Default is tensorflow/tensorflow:1.11.0-py3
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
'''
decorator_base_image = getattr(func, '_component_base_image', None)
if decorator_base_image is not None:
if base_image is not _default_base_image and decorator_base_image != base_image:
raise ValueError('base_image ({}) conflicts with the decorator-specified base image metadata ({})'.format(base_image, decorator_base_image))
else:
base_image = decorator_base_image
else:
if base_image is None:
raise ValueError('base_image cannot be None')

import inspect
import re
from collections import OrderedDict

def _extract_component_interface(func) -> ComponentSpec:
single_output_name_const = 'Output'
single_output_pythonic_name_const = 'output'

signature = inspect.signature(func)
parameters = list(signature.parameters.values())
parameter_to_type_name = OrderedDict()
inputs = []
outputs = []
extra_output_names = []
arguments = []

def annotation_to_type_struct(annotation):
if not annotation or annotation == inspect.Parameter.empty:
Expand All @@ -130,9 +104,7 @@ def annotation_to_type_struct(annotation):

for parameter in parameters:
type_struct = annotation_to_type_struct(parameter.annotation)
parameter_to_type_name[parameter.name] = str(type_struct)
#TODO: Humanize the input/output names
arguments.append(InputValuePlaceholder(parameter.name))

input_spec = InputSpec(
name=parameter.name,
Expand All @@ -154,24 +126,64 @@ def annotation_to_type_struct(annotation):
type=type_struct,
)
outputs.append(output_spec)
extra_output_names.append(field_name)
arguments.append(OutputPathPlaceholder(field_name))
elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty:
Ark-kun marked this conversation as resolved.
Show resolved Hide resolved
type_struct = annotation_to_type_struct(signature.return_annotation)
output_spec = OutputSpec(
name=single_output_name_const,
type=type_struct,
)
outputs.append(output_spec)
extra_output_names.append(single_output_pythonic_name_const)
arguments.append(OutputPathPlaceholder(single_output_name_const))

func_name=func.__name__
#Component name and description are derived from the function's name and docstribng, but can be overridden by @python_component function decorator
#The decorator can set the _component_human_name and _component_description attributes. getattr is needed to prevent error when these attributes do not exist.
component_name = getattr(func, '_component_human_name', None) or _python_function_name_to_component_name(func.__name__)
description = getattr(func, '_component_description', None) or func.__doc__
if description:
description = description.strip() + '\n' #Interesting: unlike ruamel.yaml, PyYaml cannot handle trailing spaces in the last line (' \n') and switches the style to double-quoted.

component_spec = ComponentSpec(
name=component_name,
description=description,
inputs=inputs,
outputs=outputs,
)
return component_spec


def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None) -> ComponentSpec:
'''Takes a self-contained python function and converts it to component

Args:
func: Required. The function to be converted
base_image: Optional. Docker image to be used as a base image for the python component. Must have python 3.5+ installed. Default is tensorflow/tensorflow:1.11.0-py3
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature.
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
'''
decorator_base_image = getattr(func, '_component_base_image', None)
if decorator_base_image is not None:
if base_image is not _default_base_image and decorator_base_image != base_image:
raise ValueError('base_image ({}) conflicts with the decorator-specified base image metadata ({})'.format(base_image, decorator_base_image))
else:
base_image = decorator_base_image
else:
if base_image is None:
raise ValueError('base_image cannot be None')

component_spec = _extract_component_interface(func)

arguments = []
arguments.extend(InputValuePlaceholder(input.name) for input in component_spec.inputs)
arguments.extend(OutputPathPlaceholder(output.name) for output in component_spec.outputs)

func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture)

extra_output_names = [output.name for output in component_spec.outputs]
extra_output_external_names = [name + '_file' for name in extra_output_names]

from collections import OrderedDict
parameter_to_type_name = OrderedDict((input.name, str(input.type)) for input in component_spec.inputs)

input_args_parsing_code_lines =(
" '{arg_name}': {arg_type}(sys.argv[{arg_idx}]),".format(
arg_name=name_type[0],
Expand Down Expand Up @@ -211,34 +223,22 @@ def annotation_to_type_struct(annotation):
_output_path.parent.mkdir(parents=True, exist_ok=True)
_output_path.write_text(str(_outputs[idx]))
'''.format(
func_name=func_name,
func_name=func.__name__,
func_code=func_code,
extra_code=extra_code,
input_args_parsing_code='\n'.join(input_args_parsing_code_lines),
output_files_parsing_code='\n'.join(output_files_parsing_code_lines),
)

#Removing consecutive blank lines
import re
full_source = re.sub('\n\n\n+', '\n\n', full_source).strip('\n') + '\n'

#Component name and description are derived from the function's name and docstribng, but can be overridden by @python_component function decorator
#The decorator can set the _component_human_name and _component_description attributes. getattr is needed to prevent error when these attributes do not exist.
component_name = getattr(func, '_component_human_name', None) or _python_function_name_to_component_name(func.__name__)
description = getattr(func, '_component_description', None) or func.__doc__
if description:
description = description.strip() + '\n' #Interesting: unlike ruamel.yaml, PyYaml cannot handle trailing spaces in the last line (' \n') and switches the style to double-quoted.

component_spec = ComponentSpec(
name=component_name,
description=description,
inputs=inputs,
outputs=outputs,
implementation=ContainerImplementation(
container=ContainerSpec(
image=base_image,
command=['python3', '-u', '-c', full_source],
args=arguments,
)
component_spec.implementation=ContainerImplementation(
container=ContainerSpec(
image=base_image,
command=['python3', '-u', '-c', full_source],
args=arguments,
)
)

Expand Down