Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Source File: fix check method #18481

Merged
merged 5 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@
- name: File
sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerRepository: airbyte/source-file
dockerImageTag: 0.2.26
dockerImageTag: 0.2.27
documentationUrl: https://docs.airbyte.com/integrations/sources/file
icon: file.svg
sourceType: file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3123,7 +3123,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-file:0.2.26"
- dockerImage: "airbyte/source-file:0.2.27"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/file"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.26
LABEL io.airbyte.version=0.2.27
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
from airbyte_cdk import AirbyteLogger
from source_file import SourceFile
from source_file.client import Client
from source_file.client import Client, ConfigurationError

SAMPLE_DIRECTORY = Path(__file__).resolve().parent.joinpath("sample_files/formats")

Expand Down Expand Up @@ -42,6 +42,28 @@ def test_local_file_read(file_format, extension, expected_columns, expected_rows
check_read(configs, expected_columns, expected_rows)


@pytest.mark.parametrize(
"file_format, extension, wrong_format, filename",
[
("excel", "xls", "csv", "demo"),
("excel", "xlsx", "csv", "demo"),
("csv", "csv", "excel", "demo"),
("csv", "csv", "excel", "demo"),
("jsonl", "jsonl", "excel", "jsonl_nested"),
("feather", "feather", "csv", "demo"),
("parquet", "parquet", "feather", "demo"),
("yaml", "yaml", "json", "demo"),
],
)
def test_raises_file_wrong_format(file_format, extension, wrong_format, filename):
file_directory = SAMPLE_DIRECTORY.joinpath(file_format)
file_path = str(file_directory.joinpath(f"{filename}.{extension}"))
configs = {"dataset_name": "test", "format": wrong_format, "url": file_path, "provider": {"storage": "local"}}
client = Client(**configs)
with pytest.raises((TypeError, ValueError, ConfigurationError)):
list(client.read())


def run_load_dataframes(config, expected_columns=10, expected_rows=42):
df_list = SourceFile.load_dataframes(config=config, logger=AirbyteLogger(), skip_data=False)
assert len(df_list) == 1 # Properly load 1 DataFrame
Expand Down
19 changes: 16 additions & 3 deletions airbyte-integrations/connectors/source-file/source_file/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from airbyte_cdk.sources import Source

from .client import Client
from .client import Client, ConfigurationError
from .utils import dropbox_force_download


Expand Down Expand Up @@ -96,12 +96,25 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus:
"""
config = self._validate_and_transform(config)
client = self._get_client(config)
logger.info(f"Checking access to {client.reader.full_url}...")
source_url = client.reader.full_url
artem1205 marked this conversation as resolved.
Show resolved Hide resolved
logger.info(f"Checking access to {source_url}...")
if "docs.google.com/spreadsheets" in source_url:
reason = f"Failed to load {source_url}: please use the Official Google Sheets Source connector"
logger.error(reason)
return AirbyteConnectionStatus(status=Status.FAILED, message=reason)
try:
with client.reader.open():
list(client.streams)
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
except (TypeError, ValueError, ConfigurationError) as err:
reason = (
f"Failed to load {source_url}\n Please check File Format and Reader Options are set correctly"
f"\n{repr(err)}\n{traceback.format_exc()}"
)
logger.error(reason)
return AirbyteConnectionStatus(status=Status.FAILED, message=reason)
except Exception as err:
reason = f"Failed to load {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
reason = f"Failed to load {source_url}: {repr(err)}\n{traceback.format_exc()}"
logger.error(reason)
return AirbyteConnectionStatus(status=Status.FAILED, message=reason)

Expand Down
3 changes: 2 additions & 1 deletion docs/integrations/sources/file.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ In order to read large files from a remote location, this connector uses the [sm
## Changelog

| Version | Date | Pull Request | Subject |
| ------- | ---------- | -------------------------------------------------------- | -------------------------------------------------------- |
|---------|------------| -------------------------------------------------------- |----------------------------------------------------------|
| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format |
| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link |
| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. |
| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` |
Expand Down