Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add secrets support for tap and target config, via Property(..., secret=True) #1096

Merged
merged 22 commits into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9f56e96
chore: initial refactor for readability
aaronsteers Oct 20, 2022
b85760a
feat: add Meltano rendering logic in private helper module
aaronsteers Oct 20, 2022
8d95b22
feat: add `secret=True` support in JSON Schema type helpers
aaronsteers Oct 20, 2022
2820b91
change: update examples to use 'secret=True' for protected settings
aaronsteers Oct 20, 2022
e74be3f
chore: flake8 fix
aaronsteers Oct 20, 2022
e0239b5
add unit tests for type helpers
aaronsteers Oct 20, 2022
17905b1
fix missing secret flag on unit test
aaronsteers Oct 20, 2022
9d6a364
chore: get tests passing
aaronsteers Oct 20, 2022
8ad7727
chore: add test for description
aaronsteers Oct 20, 2022
2792aa1
chore: remove commented code
aaronsteers Oct 20, 2022
b86cfc4
chore: remove files related to #1094
aaronsteers Oct 20, 2022
2b83266
chore: revert --about updates
aaronsteers Oct 20, 2022
f8c734a
Merge branch 'main' into 77-feat-secrets-support-in-config-and-streams
aaronsteers Oct 20, 2022
0160f01
use constants for annotation keys
aaronsteers Oct 21, 2022
d9647bd
chore: bump validator to Draft7
aaronsteers Oct 21, 2022
40041ed
chore: add testing for is_secret_type
aaronsteers Oct 21, 2022
839b165
chore: add tests
aaronsteers Oct 21, 2022
a82851e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2022
c2b64d7
chore: more tests
aaronsteers Oct 21, 2022
f0b680b
Merge branch '77-feat-secrets-support-in-config-and-streams' of https…
aaronsteers Oct 21, 2022
372fc34
docs: add info to FAQ
aaronsteers Oct 21, 2022
8eb0026
chore: add test for integer type
aaronsteers Oct 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Tap{{ cookiecutter.source_name }}({{ 'SQL' if cookiecutter.stream_type ==
"auth_token",
th.StringType,
required=True,
secret=True, # Flag config as protected.
description="The token to authenticate against the API service"
),
th.Property(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}):
th.Property(
"sqlalchemy_url",
th.StringType,
secret=True, # Flag config as protected.
description="SQLAlchemy connection string",
),
{%- else %}
Expand All @@ -34,6 +35,12 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}):
th.StringType,
description="The scheme with which output files will be named"
),
th.Property(
"auth_token",
th.StringType,
secret=True, # Flag config as protected.
description="The path to the target output file"
),
{%- endif %}
).to_dict()

Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_gitlab/gitlab_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class SampleTapGitlab(Tap):

name: str = "sample-tap-gitlab"
config_jsonschema = PropertiesList(
Property("auth_token", StringType, required=True),
Property("auth_token", StringType, required=True, secret=True),
Property("project_ids", ArrayType(StringType), required=True),
Property("group_ids", ArrayType(StringType), required=True),
Property("start_date", DateTimeType, required=True),
Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_google_analytics/ga_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class SampleTapGoogleAnalytics(Tap):
config_jsonschema = PropertiesList(
Property("view_id", StringType(), required=True),
Property("client_email", StringType(), required=True),
Property("private_key", StringType(), required=True),
Property("private_key", StringType(), required=True, secret=True),
).to_dict()

def discover_streams(self) -> List[SampleGoogleAnalyticsStream]:
Expand Down
98 changes: 98 additions & 0 deletions singer_sdk/helpers/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

_MAX_TIMESTAMP = "9999-12-31 23:59:59.999999"
_MAX_TIME = "23:59:59.999999"
JSONSCHEMA_ANNOTATION_SECRET = "secret"
JSONSCHEMA_ANNOTATION_WRITEONLY = "writeOnly"


class DatetimeErrorTreatmentEnum(Enum):
Expand Down Expand Up @@ -54,6 +56,41 @@ def append_type(type_dict: dict, new_type: str) -> dict:
)


def is_secret_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition appears to be a secret.

Will return true if either `writeOnly` or `secret` are true on this type
or any of the type's subproperties.

Args:
type_dict: The JSON Schema type to check.

Raises:
ValueError: If type_dict is None or empty.

Returns:
True if we detect any sensitive property nodes.
"""
if not type_dict:
raise ValueError(
"Could not detect type from empty type_dict. "
"Did you forget to define a property in the stream schema?"
)
if type_dict.get(JSONSCHEMA_ANNOTATION_WRITEONLY) or type_dict.get(
JSONSCHEMA_ANNOTATION_SECRET
):
return True

if "properties" in type_dict:
# Recursively check subproperties and return True if any child is secret.
return any(
is_secret_type(child_type_dict)
for child_type_dict in type_dict["properties"].values()
)

return False


def is_object_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is an object or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down Expand Up @@ -86,6 +123,40 @@ def is_datetime_type(type_dict: dict) -> bool:
)


def is_date_or_datetime_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a 'date'/'date-time' type.

Also returns True if type is nested within an 'anyOf' type Array.

Args:
type_dict: The JSON Schema definition.

Raises:
ValueError: If type is empty or null.

Returns:
True if date or date-time, else False.
"""
if not type_dict:
raise ValueError(
"Could not detect type from empty type_dict. "
"Did you forget to define a property in the stream schema?"
)

if "anyOf" in type_dict:
for type_dict in type_dict["anyOf"]:
if is_date_or_datetime_type(type_dict):
return True
return False

if "type" in type_dict:
return type_dict.get("format") in {"date", "date-time"}

raise ValueError(
f"Could not detect type of replication key using schema '{type_dict}'"
)


def get_datelike_property_type(property_schema: Dict) -> Optional[str]:
"""Return one of 'date-time', 'time', or 'date' if property is date-like.

Expand Down Expand Up @@ -152,6 +223,23 @@ def is_string_array_type(type_dict: dict) -> bool:
return "array" in type_dict["type"] and bool(is_string_type(type_dict["items"]))


def is_array_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a string array."""
if not type_dict:
raise ValueError(
"Could not detect type from empty type_dict. "
"Did you forget to define a property in the stream schema?"
)

if "anyOf" in type_dict:
return any([is_array_type(t) for t in type_dict["anyOf"]])

if "type" not in type_dict:
raise ValueError(f"Could not detect type from schema '{type_dict}'")

return "array" in type_dict["type"]


def is_boolean_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -162,6 +250,16 @@ def is_boolean_type(property_schema: dict) -> Optional[bool]:
return False


def is_integer_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
return None # Could not detect data type
for property_type in property_schema.get("anyOf", [property_schema.get("type")]):
if "integer" in property_type or property_type == "integer":
return True
return False


def is_string_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down
4 changes: 2 additions & 2 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)

import click
from jsonschema import Draft4Validator, SchemaError, ValidationError
from jsonschema import Draft7Validator, SchemaError, ValidationError

from singer_sdk import metrics
from singer_sdk.configuration._dict_config import parse_environment_config
Expand All @@ -42,7 +42,7 @@
SDK_PACKAGE_NAME = "singer_sdk"


JSONSchemaValidator = extend_validator_with_defaults(Draft4Validator)
JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator)


class PluginBase(metaclass=abc.ABCMeta):
Expand Down
6 changes: 3 additions & 3 deletions singer_sdk/sinks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing import IO, Any, Mapping, Sequence

from dateutil import parser
from jsonschema import Draft4Validator, FormatChecker
from jsonschema import Draft7Validator, FormatChecker

from singer_sdk.helpers._batch import (
BaseBatchFileEncoding,
Expand All @@ -29,7 +29,7 @@
)
from singer_sdk.plugin_base import PluginBase

JSONSchemaValidator = Draft4Validator
JSONSchemaValidator = Draft7Validator


class Sink(metaclass=abc.ABCMeta):
Expand Down Expand Up @@ -80,7 +80,7 @@ def __init__(
self._batch_records_read: int = 0
self._batch_dupe_records_merged: int = 0

self._validator = Draft4Validator(schema, format_checker=FormatChecker())
self._validator = Draft7Validator(schema, format_checker=FormatChecker())

def _get_context(self, record: dict) -> dict:
"""Return an empty dictionary by default.
Expand Down
23 changes: 22 additions & 1 deletion singer_sdk/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@
from jsonschema import validators

from singer_sdk.helpers._classproperty import classproperty
from singer_sdk.helpers._typing import append_type, get_datelike_property_type
from singer_sdk.helpers._typing import (
JSONSCHEMA_ANNOTATION_SECRET,
JSONSCHEMA_ANNOTATION_WRITEONLY,
append_type,
get_datelike_property_type,
)

if sys.version_info >= (3, 10):
from typing import TypeAlias
Expand Down Expand Up @@ -352,21 +357,30 @@ def __init__(
required: bool = False,
default: _JsonValue = None,
description: str = None,
secret: bool = False,
) -> None:
"""Initialize Property object.

Note: Properties containing secrets should be specified with `secret=True`.
Doing so will add the annotation `writeOnly=True`, in accordance with JSON
Schema Draft 7 and later, and `secret=True` as an additional hint to readers.

More info: https://json-schema.org/draft-07/json-schema-release-notes.html
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved

Args:
name: Property name.
wrapped: JSON Schema type of the property.
required: Whether this is a required property.
default: Default value in the JSON Schema.
description: Long-text property description.
secret: True if this is a credential or other secret.
"""
self.name = name
self.wrapped = wrapped
self.optional = not required
self.default = default
self.description = description
self.secret = secret

@property
def type_dict(self) -> dict: # type: ignore # OK: @classproperty vs @property
Expand Down Expand Up @@ -402,6 +416,13 @@ def to_dict(self) -> dict:
type_dict.update({"default": self.default})
if self.description:
type_dict.update({"description": self.description})
if self.secret:
type_dict.update(
{
JSONSCHEMA_ANNOTATION_SECRET: True,
JSONSCHEMA_ANNOTATION_WRITEONLY: True,
}
)
return {self.name: type_dict}


Expand Down
51 changes: 50 additions & 1 deletion tests/core/test_jsonschema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

import pytest

from singer_sdk.helpers._typing import (
JSONSCHEMA_ANNOTATION_SECRET,
JSONSCHEMA_ANNOTATION_WRITEONLY,
)
from singer_sdk.streams.core import Stream
from singer_sdk.tap_base import Tap
from singer_sdk.typing import (
Expand Down Expand Up @@ -43,7 +47,7 @@ class ConfigTestTap(Tap):
config_jsonschema = PropertiesList(
Property("host", StringType, required=True),
Property("username", StringType, required=True),
Property("password", StringType, required=True),
Property("password", StringType, required=True, secret=True),
Property("batch_size", IntegerType, default=-1),
).to_dict()

Expand Down Expand Up @@ -253,6 +257,51 @@ def test_inbuilt_type(json_type: JSONTypeHelper, expected_json_schema: dict):
assert json_type.type_dict == expected_json_schema


@pytest.mark.parametrize(
"property_obj,expected_jsonschema",
[
(
Property("my_prop1", StringType, required=True),
{"my_prop1": {"type": ["string"]}},
),
(
Property("my_prop2", StringType, required=False),
{"my_prop2": {"type": ["string", "null"]}},
),
(
Property("my_prop3", StringType, secret=True),
{
"my_prop3": {
"type": ["string", "null"],
JSONSCHEMA_ANNOTATION_SECRET: True,
JSONSCHEMA_ANNOTATION_WRITEONLY: True,
}
},
),
(
Property("my_prop4", StringType, description="This is a property."),
{
"my_prop4": {
"description": "This is a property.",
"type": ["string", "null"],
}
},
),
(
Property("my_prop5", StringType, default="some_val"),
{
"my_prop5": {
"default": "some_val",
"type": ["string", "null"],
}
},
),
],
)
def test_property_creation(property_obj: Property, expected_jsonschema: dict) -> None:
assert property_obj.to_dict() == expected_jsonschema


def test_wrapped_type_dict():
with pytest.raises(
ValueError,
Expand Down