-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(ingest): use branch info when cloning git repos (#6937)
- Loading branch information
Showing
4 changed files
with
65 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ class GitHubReference(ConfigModel): | |
) | ||
branch: str = Field( | ||
"main", | ||
description="Branch on which your files live by default. Typically main or master.", | ||
description="Branch on which your files live by default. Typically main or master. This can also be a commit hash.", | ||
) | ||
base_url: str = Field( | ||
"https://github.com", | ||
|
@@ -73,3 +73,15 @@ def auto_infer_from_repo(cls, v: Optional[str], values: Dict[str, Any]) -> str: | |
if v is None: | ||
return f"[email protected]:{values.get('repo')}" | ||
return v | ||
|
||
@property | ||
def branch_for_clone(self) -> Optional[str]: | ||
# If branch was manually set, we should use it. Otherwise return None. | ||
# We do this because we want to use the default branch unless they override it. | ||
# While our default for branch is "main", they could be using "master" or something else. | ||
# It's ok if the URLs we generate are slightly incorrect, but changing branch to be | ||
# required would be a breaking change. | ||
|
||
if "branch" in self.__fields_set__: | ||
return self.branch | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 42 additions & 23 deletions
65
metadata-ingestion/tests/integration/git/test_git_clone.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,50 @@ | ||
import os | ||
|
||
import pytest | ||
from pydantic import SecretStr | ||
|
||
from datahub.configuration.github import GitHubInfo | ||
from datahub.ingestion.source.git.git_import import GitClone | ||
|
||
LOOKML_TEST_SSH_KEY = os.environ.get("DATAHUB_LOOKML_GIT_TEST_SSH_KEY") | ||
|
||
def test_git_clone(pytestconfig, tmp_path): | ||
|
||
@pytest.mark.skipif( | ||
LOOKML_TEST_SSH_KEY is None, | ||
reason="DATAHUB_LOOKML_GIT_TEST_SSH_KEY env variable is not configured", | ||
) | ||
def test_git_clone(tmp_path): | ||
git_clone = GitClone(str(tmp_path)) | ||
secret_env_variable = "DATAHUB_LOOKML_GIT_TEST_SSH_KEY" | ||
if os.environ.get(secret_env_variable) is not None: | ||
secret_key = SecretStr(os.environ.get(secret_env_variable)) # type: ignore | ||
checkout_dir = git_clone.clone( | ||
ssh_key=secret_key, | ||
repo_url="[email protected]:acryldata/long-tail-companions-looker", | ||
) | ||
assert os.path.exists(checkout_dir) | ||
assert set(os.listdir(checkout_dir)) == set( | ||
[ | ||
".datahub", | ||
"models", | ||
"README.md", | ||
".github", | ||
".git", | ||
"views", | ||
] | ||
) | ||
else: | ||
print( | ||
"Skipping test as env variable DATAHUB_LOOKML_GIT_TEST_SSH_KEY is not configured" | ||
) | ||
secret_key = SecretStr(LOOKML_TEST_SSH_KEY) if LOOKML_TEST_SSH_KEY else None | ||
|
||
checkout_dir = git_clone.clone( | ||
ssh_key=secret_key, | ||
repo_url="[email protected]:acryldata/long-tail-companions-looker", | ||
branch="d380a2b777ec6f4653626f39c68dba85893faa74", | ||
) | ||
assert os.path.exists(checkout_dir) | ||
assert set(os.listdir(checkout_dir)) == set( | ||
[ | ||
".datahub", | ||
"models", | ||
"README.md", | ||
".github", | ||
".git", | ||
"views", | ||
"manifest_lock.lkml", | ||
"manifest.lkml", | ||
] | ||
) | ||
|
||
|
||
def test_github_branch(): | ||
config = GitHubInfo( | ||
repo="owner/repo", | ||
) | ||
assert config.branch_for_clone is None | ||
|
||
config = GitHubInfo( | ||
repo="owner/repo", | ||
branch="main", | ||
) | ||
assert config.branch_for_clone == "main" |