Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix pyfunc bugs for release #188

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
29c6a18
Refactor pyfunc ensemble method to use input as arg name
deadlycoconuts Mar 24, 2022
e86f394
Hide the search bar for ensemblers temporarily
deadlycoconuts Mar 24, 2022
d59f759
Refactor predictions in pyfunc interface
deadlycoconuts Mar 24, 2022
4655216
Refactor predictions in pyfunc ensembler sample
deadlycoconuts Mar 25, 2022
d3244b4
Fix renaming of features to input
deadlycoconuts Mar 25, 2022
ccabb46
Refactor pyfunc ensembler service engine
deadlycoconuts Mar 25, 2022
b02edc9
Update SDK version number
deadlycoconuts Mar 25, 2022
4293ec4
Fix undeterministic experiment behaviour for e2e tests
deadlycoconuts Mar 25, 2022
f41ce6b
Refactor name change from features to input in batch ensembling engine
deadlycoconuts Mar 25, 2022
1ec08af
Add ensembler name search to API
deadlycoconuts Mar 25, 2022
6b4bc48
Update openapi generated ensembler class
deadlycoconuts Mar 25, 2022
8720668
Add description to search query for list ensemblers endpoint
deadlycoconuts Mar 25, 2022
471f6d5
Revert version number to base number
deadlycoconuts Mar 25, 2022
a5063ec
Replace ensembler/job name search to be case-insensitive
deadlycoconuts Mar 25, 2022
c3ac01f
Remove random tag created
deadlycoconuts Mar 25, 2022
353954a
Revert infra values to original
deadlycoconuts Mar 28, 2022
df35fd0
Drop python versions for ensembler engines
deadlycoconuts Mar 28, 2022
4693f44
Fix jinja2 version for batch ensembler engine
deadlycoconuts Mar 28, 2022
c47112b
Simplify API description for jobs name query string
deadlycoconuts Mar 28, 2022
481c693
Drop ensembler engines setup files Python version to 3.7
deadlycoconuts Mar 28, 2022
5c053b2
- fix treatments assigner
romanwozniak Mar 28, 2022
808818c
- sort exp treatments by the traffic %
romanwozniak Mar 28, 2022
99fdb28
Fix ensembler engines and SDK to all use Python 3.7.*
deadlycoconuts Mar 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion api/api/openapi.bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ paths:
schema:
default: 10
type: integer
- description: Searches ensembler name for a partial match of the search text
in: query
name: search
schema:
type: string
- in: query
name: type
schema:
Expand Down Expand Up @@ -172,7 +177,9 @@ paths:
schema:
format: int32
type: integer
- in: query
- description: Searches ensembler name used for the ensembling job for a partial
deadlycoconuts marked this conversation as resolved.
Show resolved Hide resolved
match of the search text
in: query
name: search
schema:
type: string
Expand Down
5 changes: 5 additions & 0 deletions api/api/specs/ensemblers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ paths:
schema:
type: integer
default: 10
- in: query
name: search
description: Searches ensembler name for a partial match of the search text
schema:
type: string
deadlycoconuts marked this conversation as resolved.
Show resolved Hide resolved
- in: query
name: type
schema:
Expand Down
1 change: 1 addition & 0 deletions api/api/specs/jobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ paths:
<<: *id
- in: query
name: search
description: Searches ensembler name used for the ensembling job for a partial match of the search text
schema:
type: string
- in: query
Expand Down
7 changes: 7 additions & 0 deletions api/turing/service/ensembler_service.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package service

import (
"fmt"

"github.com/gojek/turing/api/turing/models"
"github.com/jinzhu/gorm"
)
Expand All @@ -23,6 +25,7 @@ type EnsemblersFindByIDOptions struct {
type EnsemblersListOptions struct {
PaginationOptions
ProjectID *models.ID `schema:"project_id" validate:"required"`
Search *string `schema:"search"`
EnsemblerType *models.EnsemblerType `schema:"type" validate:"omitempty,oneof=pyfunc docker"`
}

Expand Down Expand Up @@ -68,6 +71,10 @@ func (service *ensemblersService) List(options EnsemblersListOptions) (*Paginate
query = query.Where("project_id = ?", options.ProjectID)
}

if options.Search != nil && len(*options.Search) > 0 {
query = query.Where("name ilike ?", fmt.Sprintf("%%%s%%", *options.Search))
}

if options.EnsemblerType != nil {
query = query.Where("type = ?", options.EnsemblerType)
}
Expand Down
2 changes: 1 addition & 1 deletion api/turing/service/ensembling_job_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func (s *ensemblingJobService) List(options EnsemblingJobListOptions) (*Paginate
}

if options.Search != nil && len(*options.Search) > 0 {
query = query.Where("name like ?", fmt.Sprintf("%%%s%%", *options.Search))
query = query.Where("name ilike ?", fmt.Sprintf("%%%s%%", *options.Search))
}

if options.Statuses != nil {
Expand Down
2 changes: 1 addition & 1 deletion engines/pyfunc-ensembler-job/environment.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: pyfunc-ensembler-job
dependencies:
- python=3.8
- python=3.7
deadlycoconuts marked this conversation as resolved.
Show resolved Hide resolved
- pip=21.0.1
- pip:
- -r file:requirements.txt
Expand Down
4 changes: 2 additions & 2 deletions engines/pyfunc-ensembler-job/tests/ensembler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def initialize(self, artifacts: dict):

def ensemble(
self,
features: pandas.Series,
input: pandas.Series,
predictions: pandas.Series,
treatment_config: Optional[dict]):
return predictions[f'model_{self.result_type.name.lower()}']
Expand Down Expand Up @@ -93,7 +93,7 @@ def initialize(self, artifacts: dict):

def ensemble(
self,
features: pandas.Series,
input: pandas.Series,
predictions: pandas.Series,
treatment_config: Optional[dict]) -> Any:
return predictions[['model_a', 'model_b']].to_numpy() * 2
Expand Down
2 changes: 1 addition & 1 deletion engines/pyfunc-ensembler-service/environment.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: pyfunc-ensembler-service
dependencies:
- python=3.8
- python=3.7
- pip=21.0.1
- pip:
- -r file:requirements.txt
Expand Down
18 changes: 3 additions & 15 deletions engines/pyfunc-ensembler-service/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,13 @@ def initialize(self, artifacts: dict):

def ensemble(
self,
features: Dict,
input: Dict,
predictions: Dict,
treatment_config: Dict) -> Any:
route_name_to_id = TestEnsembler.get_route_name_to_id_mapping(predictions)
if treatment_config['configuration']['name'] == "choose_the_control":
return predictions[route_name_to_id['control']]['data']['predictions']
return predictions['control']['data']['predictions']
else:
return predictions[0]['data']['predictions']

@staticmethod
def get_route_name_to_id_mapping(predictions):
"""
Helper function to look through the predictions returned from the various routes and to map their names to
their id numbers (the order in which they are found in the payload.
"""
route_name_to_id = {}
for i, pred in enumerate(predictions):
route_name_to_id[pred['route']] = i
return route_name_to_id
deadlycoconuts marked this conversation as resolved.
Show resolved Hide resolved
return [0, 0]


@pytest.fixture
Expand Down
4 changes: 2 additions & 2 deletions sdk/samples/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ def initialize(self, artifacts: dict):

def ensemble(
self,
features: pandas.Series,
input: pandas.Series,
predictions: pandas.Series,
treatment_config: Optional[dict]) -> Any:
customer_id = features["customer_id"]
customer_id = input["customer_id"]
if (customer_id % 2) == 0:
return predictions['model_even']
else:
Expand Down
25 changes: 10 additions & 15 deletions sdk/samples/router/create_router_with_pyfunc_ensembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,43 +17,38 @@
class SampleEnsembler(turing.ensembler.PyFunc):
"""
A simple ensembler, that returns the value corresponding to the version that has been specified in the
`features` in each request. This value if obtained from the route responses found in the `predictions` in each
`input` in each request. This value if obtained from the route responses found in the `predictions` in each
request.

If no version is specified in `features`, return the sum of all the values of all the route responses in
If no version is specified in `input`, return the sum of all the values of all the route responses in
`predictions` instead.

e.g. The values in the route responses (`predictions`) corresponding to the versions, `a`, `b` and `c` are 1, 2
and 3 respectively.

For a given request, if the version specified in `features` is "a", the ensembler would return the value 1.
For a given request, if the version specified in `input` is "a", the ensembler would return the value 1.

If no version is specified in `features`, the ensembler would return the value 6 (1 + 2 + 3).
If no version is specified in `input`, the ensembler would return the value 6 (1 + 2 + 3).
"""
# `initialize` is essentially a method that gets called when an object of your implemented class gets instantiated
def initialize(self, artifacts: dict):
pass

# Each time a Turing Router sends a request to a pyfunc ensembler, ensemble will be called, with the request payload
# being passed as the `features` argument, and the route responses as the `predictions` argument.
# being passed as the `input` argument, and the route responses as the `predictions` argument.
#
# If an experiment has been set up, the experiment returned would also be passed as the `treatment_config` argument.
#
# The return value of `ensemble` will then be returned as a `json` payload to the Turing router.
def ensemble(
self,
features: dict,
predictions: List[dict],
input: dict,
predictions: dict,
treatment_config: dict) -> Any:
# Get a mapping between route names and their corresponding responses
routes_to_response = dict()
for prediction in predictions:
routes_to_response[prediction["route"]] = prediction

if "version" in features:
return routes_to_response[features["version"]]["data"]["value"]
if "version" in input:
return predictions[input["version"]]["data"]["value"]
else:
return sum(response["data"]["value"] for response in routes_to_response.values())
return sum(prediction["data"]["value"] for prediction in predictions.values())


def main(turing_api: str, project: str):
Expand Down
6 changes: 3 additions & 3 deletions sdk/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ def initialize(self, artifacts: dict):

def ensemble(
self,
features: pandas.Series,
input: pandas.Series,
predictions: pandas.Series,
treatment_config: Optional[dict]
) -> Any:
if features["treatment"] in predictions:
return predictions[features["treatment"]]
if input["treatment"] in predictions:
return predictions[input["treatment"]]
else:
return self._default
27 changes: 17 additions & 10 deletions sdk/turing/ensembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ class EnsemblerBase(abc.ABC):
@abc.abstractmethod
def ensemble(
self,
features: pandas.Series,
predictions: pandas.Series,
treatment_config: Optional[pandas.Series]) -> Any:
input: Union[pandas.Series, Dict[str, Any]],
predictions: Union[pandas.Series, Dict[str, Any]],
treatment_config: Optional[Union[pandas.Series, Dict[str, Any]]]) -> Any:
"""
Ensembler should have an ensemble method, that implements the logic on how to
ensemble final prediction results from individual model predictions and a treatment
configuration.

:param features: pandas.Series, containing a single row with input features
:param predictions: pandas.Series, containing a single row with all models predictions
`predictions['model-a']` will contain prediction results from the model-a
:param treatment_config: Optional[pandas.Series], representing the configuration of a
:param input: Union[pandas.Series, Dict[str, Any]], containing a single row or dict with input features
:param predictions: Union[pandas.Series, Dict[str, Any]], containing a single row or dict with all
models' predictions: `predictions['model-a']` will contain prediction results from the model-a
:param treatment_config: Optional[Union[pandas.Series, Dict[str, Any]]], representing the configuration of a
treatment, that should be applied to a given record/payload. If the experiment
engine is not configured, then `treatment_config` will be `None`

Expand Down Expand Up @@ -73,7 +73,7 @@ def _ensemble_batch(self, model_input: pandas.DataFrame) -> Union[numpy.ndarray,
.rename(columns=prediction_columns) \
.apply(lambda row:
self.ensemble(
features=row.drop(prediction_columns.values()),
input=row.drop(prediction_columns.values()),
predictions=row[prediction_columns.values()],
treatment_config=None
), axis=1, result_type='expand')
Expand All @@ -83,9 +83,16 @@ def _ensemble_request(self, model_input: Dict[str, Any]) -> Any:
Helper function to ensemble single requests; works on dictionary input in a single request made to the pyfunc
ensembler service (run by the pyfunc ensembler service engine)
"""
# Get a mapping between route names and their corresponding responses
routes_to_response = dict()
for prediction in model_input['response']['route_responses']:
routes_to_response[prediction["route"]] = prediction.copy()
# Deletes route from the dictionary as it is a duplicate of the key
del routes_to_response[prediction["route"]]["route"]

return self.ensemble(
features=model_input['request'],
predictions=model_input['response']['route_responses'],
input=model_input['request'],
predictions=routes_to_response,
treatment_config=model_input['response']['experiment']
)

Expand Down
6 changes: 6 additions & 0 deletions sdk/turing/generated/api/ensembler_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ def __list_ensemblers(
Keyword Args:
page (int): [optional] if omitted the server will use the default value of 1
page_size (int): [optional] if omitted the server will use the default value of 10
search (str): Searches ensembler name for a partial match of the search text. [optional]
type (EnsemblerType): [optional]
_return_http_data_only (bool): response data without head status
code and headers. Default is True.
Expand Down Expand Up @@ -373,6 +374,7 @@ def __list_ensemblers(
'project_id',
'page',
'page_size',
'search',
'type',
],
'required': [
Expand All @@ -397,19 +399,23 @@ def __list_ensemblers(
(int,),
'page_size':
(int,),
'search':
(str,),
'type':
(EnsemblerType,),
},
'attribute_map': {
'project_id': 'project_id',
'page': 'page',
'page_size': 'page_size',
'search': 'search',
'type': 'type',
},
'location_map': {
'project_id': 'path',
'page': 'query',
'page_size': 'query',
'search': 'query',
'type': 'query',
},
'collection_format_map': {
Expand Down
2 changes: 1 addition & 1 deletion sdk/turing/generated/api/ensembling_job_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def __list_ensembling_jobs(
page (int): [optional] if omitted the server will use the default value of 1
page_size (int): [optional] if omitted the server will use the default value of 10
ensembler_id (int): [optional]
search (str): [optional]
search (str): Searches ensembler name used for the ensembling job for a partial match of the search text. [optional]
status ([EnsemblerJobStatus]): [optional]
_return_http_data_only (bool): response data without head status
code and headers. Default is True.
Expand Down
3 changes: 2 additions & 1 deletion sdk/turing/version.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
VERSION = "0.1.0"
# Do not update this field manually; this value gets updated automically during build time
VERSION = "0.0.0"