Skip to content

Commit

Permalink
Merge branch 'master' into arsenlosenko/5622-xlsb-format-support-and-…
Browse files Browse the repository at this point in the history
…tests
  • Loading branch information
arsenlosenko authored Aug 12, 2022
2 parents 5eca530 + 3dd9e9f commit ae7401c
Show file tree
Hide file tree
Showing 385 changed files with 25,058 additions and 15,688 deletions.
4 changes: 4 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ JOB_MAIN_CONTAINER_CPU_LIMIT=
JOB_MAIN_CONTAINER_MEMORY_REQUEST=
JOB_MAIN_CONTAINER_MEMORY_LIMIT=

NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT=
NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST=
NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT=
NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST=

### LOGGING/MONITORING/TRACKING ###
TRACKING_STRATEGY=segment
Expand Down
26 changes: 2 additions & 24 deletions .github/workflows/shared-pulls.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,13 @@ on:
types: [opened, labeled, unlabeled, ready_for_review, synchronize, reopened]

jobs:
find_valid_pat:
name: "Find a PAT with room for actions"
timeout-minutes: 10
runs-on: ubuntu-latest
outputs:
pat: ${{ steps.variables.outputs.pat }}
steps:
- name: Checkout Airbyte
uses: actions/checkout@v2
- name: Check PAT rate limits
id: variables
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.OCTAVIA_PAT }} \
${{ secrets.AIRBYTEIO_PAT }} \
${{ secrets.OSS_BUILD_RUNNER_GITHUB_PAT }} \
${{ secrets.SUPERTOPHER_PAT }} \
${{ secrets.DAVINCHIA_PAT }}
shared-pulls:
name: "Label github issues for tracking"
needs:
- find_valid_pat
runs-on: ubuntu-latest
steps:
- uses: nick-fields/private-action-loader@v3
with:
pal-repo-token: ${{ needs.find_valid_pat.outputs.pat }}
pal-repo-token: "${{ secrets.OCTAVIA_PAT }}"
pal-repo-name: airbytehq/workflow-actions@production
# the following input gets passed to the private action
token: ${{ needs.find_valid_pat.outputs.pat }}
token: "${{ secrets.OCTAVIA_PAT }}"
command: "pull"
3 changes: 3 additions & 0 deletions .vscode/frontend.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
},
{
"path": "../airbyte-webapp-e2e-tests"
},
{
"path": "../docs"
}
],
"extensions": {
Expand Down
51 changes: 49 additions & 2 deletions airbyte-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,49 @@ plugins {

def specFile = "$projectDir/src/main/openapi/config.yaml"

// Deprecated -- can be removed once airbyte-server is converted to use the per-domain endpoints generated by generateApiServer
task generateApiServerLegacy(type: GenerateTask) {
def serverOutputDir = "$buildDir/generated/api/server"

inputs.file specFile
outputs.dir serverOutputDir

generatorName = "jaxrs-spec"
inputSpec = specFile
outputDir = serverOutputDir

apiPackage = "io.airbyte.api.generated"
invokerPackage = "io.airbyte.api.invoker.generated"
modelPackage = "io.airbyte.api.model.generated"

importMappings = [
'OAuthConfiguration' : 'com.fasterxml.jackson.databind.JsonNode',
'SourceDefinitionSpecification' : 'com.fasterxml.jackson.databind.JsonNode',
'SourceConfiguration' : 'com.fasterxml.jackson.databind.JsonNode',
'DestinationDefinitionSpecification': 'com.fasterxml.jackson.databind.JsonNode',
'DestinationConfiguration' : 'com.fasterxml.jackson.databind.JsonNode',
'StreamJsonSchema' : 'com.fasterxml.jackson.databind.JsonNode',
'StateBlob' : 'com.fasterxml.jackson.databind.JsonNode',
'FieldSchema' : 'com.fasterxml.jackson.databind.JsonNode',
]

generateApiDocumentation = false

configOptions = [
dateLibrary : "java8",
generatePom : "false",
interfaceOnly: "true",
/*
JAX-RS generator does not respect nullable properties defined in the OpenApi Spec.
It means that if a field is not nullable but not set it is still returning a null value for this field in the serialized json.
The below Jackson annotation is made to only keep non null values in serialized json.
We are not yet using nullable=true properties in our OpenApi so this is a valid workaround at the moment to circumvent the default JAX-RS behavior described above.
Feel free to read the conversation on https://github.com/airbytehq/airbyte/pull/13370 for more details.
*/
additionalModelTypeAnnotations: "\n@com.fasterxml.jackson.annotation.JsonInclude(com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL)",
]
}

task generateApiServer(type: GenerateTask) {
def serverOutputDir = "$buildDir/generated/api/server"

Expand Down Expand Up @@ -45,10 +88,14 @@ task generateApiServer(type: GenerateTask) {
We are not yet using nullable=true properties in our OpenApi so this is a valid workaround at the moment to circumvent the default JAX-RS behavior described above.
Feel free to read the conversation on https://github.com/airbytehq/airbyte/pull/13370 for more details.
*/
additionalModelTypeAnnotations: "\n@com.fasterxml.jackson.annotation.JsonInclude(com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL)"
additionalModelTypeAnnotations: "\n@com.fasterxml.jackson.annotation.JsonInclude(com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL)",

// Generate separate classes for each endpoint "domain"
useTags: "true"
]
}
compileJava.dependsOn tasks.generateApiServer

compileJava.dependsOn tasks.generateApiServerLegacy, tasks.generateApiServer

task generateApiClient(type: GenerateTask) {
def clientOutputDir = "$buildDir/generated/api/client"
Expand Down
74 changes: 74 additions & 0 deletions airbyte-api/src/main/openapi/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3218,6 +3218,10 @@ components:
$ref: "#/components/schemas/AirbyteCatalog"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
resourceRequirements:
Expand Down Expand Up @@ -3257,6 +3261,10 @@ components:
$ref: "#/components/schemas/AirbyteCatalog"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
resourceRequirements:
Expand Down Expand Up @@ -3298,6 +3306,10 @@ components:
$ref: "#/components/schemas/AirbyteCatalog"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
resourceRequirements:
Expand Down Expand Up @@ -3335,6 +3347,10 @@ components:
$ref: "#/components/schemas/AirbyteCatalog"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
resourceRequirements:
Expand Down Expand Up @@ -3386,6 +3402,10 @@ components:
$ref: "#/components/schemas/AirbyteCatalog"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
resourceRequirements:
Expand Down Expand Up @@ -3416,6 +3436,10 @@ components:
$ref: "#/components/schemas/DestinationId"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
source:
Expand Down Expand Up @@ -3445,6 +3469,10 @@ components:
$ref: "#/components/schemas/DestinationId"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
source:
Expand All @@ -3467,6 +3495,8 @@ components:
- active
- inactive
- deprecated
# TODO(https://github.com/airbytehq/airbyte/issues/11432): remove.
# Prefer the ConnectionScheduleType and ConnectionScheduleData properties.
ConnectionSchedule:
description: if null, then no schedule is set.
type: object
Expand All @@ -3485,6 +3515,46 @@ components:
- days
- weeks
- months
ConnectionScheduleType:
description: determine how the schedule data should be interpreted
type: string
enum:
- manual
- basic
- cron
ConnectionScheduleData:
description: schedule for when the the connection should run, per the schedule type
type: object
properties:
# This should be populated when schedule type is basic.
basicSchedule:
type: object
required:
- timeUnit
- units
properties:
timeUnit:
type: string
enum:
- minutes
- hours
- days
- weeks
- months
units:
type: integer
format: int64
# This should be populated when schedule type is cron.
cron:
type: object
required:
- cronExpression
- cronTimeZone
properties:
cronExpression:
type: string
cronTimeZone:
type: string
NamespaceDefinitionType:
type: string
description: Method used for computing final namespace in destination
Expand Down Expand Up @@ -4564,6 +4634,10 @@ components:
$ref: "#/components/schemas/AirbyteCatalog"
schedule:
$ref: "#/components/schemas/ConnectionSchedule"
scheduleType:
$ref: "#/components/schemas/ConnectionScheduleType"
scheduleData:
$ref: "#/components/schemas/ConnectionScheduleData"
status:
$ref: "#/components/schemas/ConnectionStatus"
operationIds:
Expand Down
8 changes: 7 additions & 1 deletion airbyte-cdk/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Changelog

## 0.1.72
## 0.1.74
- Replace JelloRecordExtractor with DpathRecordExtractor

## 0.1.73
- Bugfix: Fix bug in DatetimeStreamSlicer's parsing method

## 0.1.72
- Bugfix: Fix bug in DatetimeStreamSlicer's format method

## 0.1.71
- Refactor declarative package to dataclasses
- Bugfix: Requester header always converted to string
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import datetime
from typing import Union


class DatetimeParser:
"""
Parses and formats datetime objects according to a specified format.
This class mainly acts as a wrapper to properly handling timestamp formatting through the "%s" directive.
%s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it always return a datetime in the system's timezone.
Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp()
"""

def parse(self, date: Union[str, int], format: str, timezone):
# "%s" is a valid (but unreliable) directive for formatting, but not for parsing
# It is defined as
# The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html
#
# The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp
# See https://stackoverflow.com/a/4974930
if format == "%s":
return datetime.datetime.fromtimestamp(int(date), tz=timezone)
else:
return datetime.datetime.strptime(str(date), format).replace(tzinfo=timezone)

def format(self, dt: datetime.datetime, format: str) -> str:
# strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on
# It's safer to use the timestamp() method than the %s directive
# See https://stackoverflow.com/a/4974930
if format == "%s":
return str(int(dt.timestamp()))
else:
return dt.strftime(format)
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dataclasses import InitVar, dataclass, field
from typing import Any, Mapping, Union

from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
from dataclasses_jsonschema import JsonSchemaMixin

Expand Down Expand Up @@ -40,6 +41,7 @@ class MinMaxDatetime(JsonSchemaMixin):
def __post_init__(self, options: Mapping[str, Any]):
self.datetime = InterpolatedString.create(self.datetime, options=options or {})
self.timezone = dt.timezone.utc
self._parser = DatetimeParser()
self.min_datetime = InterpolatedString.create(self.min_datetime, options=options) if self.min_datetime else None
self.max_datetime = InterpolatedString.create(self.max_datetime, options=options) if self.max_datetime else None

Expand All @@ -57,17 +59,13 @@ def get_datetime(self, config, **additional_options) -> dt.datetime:
if not datetime_format:
datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"

time = dt.datetime.strptime(str(self.datetime.eval(config, **additional_options)), datetime_format).replace(tzinfo=self._timezone)
time = self._parser.parse(str(self.datetime.eval(config, **additional_options)), datetime_format, self.timezone)

if self.min_datetime:
min_time = dt.datetime.strptime(str(self.min_datetime.eval(config, **additional_options)), datetime_format).replace(
tzinfo=self._timezone
)
min_time = self._parser.parse(str(self.min_datetime.eval(config, **additional_options)), datetime_format, self.timezone)
time = max(time, min_time)
if self.max_datetime:
max_time = dt.datetime.strptime(str(self.max_datetime.eval(config, **additional_options)), datetime_format).replace(
tzinfo=self._timezone
)
max_time = self._parser.parse(str(self.max_datetime.eval(config, **additional_options)), datetime_format, self.timezone)
time = min(time, max_time)
return time

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class DeclarativeStream(Stream, JsonSchemaMixin):
DeclarativeStream is a Stream that delegates most of its logic to its schema_load and retriever
Attributes:
stream_name (str): stream name
stream_primary_key (Optional[Union[str, List[str], List[List[str]]]]): the primary key of the stream
name (str): stream name
primary_key (Optional[Union[str, List[str], List[List[str]]]]): the primary key of the stream
schema_loader (SchemaLoader): The schema loader
retriever (Retriever): The retriever
config (Config): The user-provided configuration as specified by the source's spec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ class ReadException(Exception):
"""
Raise when there is an error reading data from an API Source
"""


class InvalidConnectorDefinitionException(Exception):
"""
Raise when the connector definition is invalid
"""
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector

__all__ = ["HttpSelector", "JelloExtractor", "RecordFilter", "RecordSelector"]
__all__ = ["HttpSelector", "DpathExtractor", "RecordFilter", "RecordSelector"]
Loading

0 comments on commit ae7401c

Please sign in to comment.