diff --git a/.fides/dataset.yml b/.fides/dataset.yml index 0843bd084ae..3c9cea741fe 100644 --- a/.fides/dataset.yml +++ b/.fides/dataset.yml @@ -1,6 +1,7 @@ dataset: - fides_key: public organization_fides_key: default_organization + tags: null name: public description: The dataset responsible for storing all of the data related to a fidesctl instance. @@ -198,14 +199,14 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at + - name: tags description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags + - name: updated_at description: null data_categories: - system.operations @@ -267,14 +268,14 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at + - name: tags description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags + - name: updated_at description: null data_categories: - system.operations @@ -345,14 +346,14 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at + - name: tags description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags + - name: updated_at description: null data_categories: - system.operations @@ -450,14 +451,14 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at + - name: tags description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags + - name: updated_at description: null data_categories: - system.operations @@ -585,22 +586,22 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: third_country_transfers + - name: tags description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at - description: The timestamp of when the row was last updated + - name: third_country_transfers + description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags - description: null + - name: updated_at + description: The timestamp of when the row was last updated data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified @@ -878,15 +879,15 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at - description: The timestamp of when the row was last updated + - name: tags + description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags - description: null + - name: updated_at + description: The timestamp of when the row was last updated data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified @@ -947,15 +948,15 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at - description: The timestamp of when the row was last updated + - name: tags + description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags - description: null + - name: updated_at + description: The timestamp of when the row was last updated data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified @@ -1009,15 +1010,15 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at - description: The timestamp of when the row was last updated + - name: tags + description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags - description: null + - name: updated_at + description: The timestamp of when the row was last updated data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified @@ -1143,22 +1144,22 @@ dataset: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: third_country_transfers + - name: tags description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: updated_at - description: The timestamp of when the row was last updated + - name: third_country_transfers + description: null data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: null fields: null - - name: tags - description: null + - name: updated_at + description: The timestamp of when the row was last updated data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/.fides/policy.yml b/.fides/policy.yml index a3b6616d1fd..545f099d4cd 100644 --- a/.fides/policy.yml +++ b/.fides/policy.yml @@ -1,6 +1,7 @@ policy: - fides_key: fidesctl_policy organization_fides_key: default_organization + tags: null name: Fidesctl Policy description: The main privacy policy for Fidesctl. rules: @@ -20,6 +21,7 @@ policy: data_qualifier: aggregated - fides_key: data_sharing_policy organization_fides_key: default_organization + tags: null name: Data Sharing description: The privacy policy that governs sharing of data with third parties. rules: diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cd1ab438c4..e433da4440c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,8 @@ The types of changes are: * System scanning results: AWS systems are stored and can be selected for review * Added Cypress for testing [713](https://github.com/ethyca/fides/pull/833) * CustomInput type "password" with show/hide icon. -* Sync CLI command now checks for untracked/unstaged files in the manifests dir [#869](https://github.com/ethyca/fides/pull/869) +* Pull CLI command now checks for untracked/unstaged files in the manifests dir [#869](https://github.com/ethyca/fides/pull/869) +* Pull CLI command has a flag to pull missing files from the server [#895](https://github.com/ethyca/fides/pull/895) * Add Okta support to the `/generate` endpoint [#842](https://github.com/ethyca/fides/pull/842) * Add db support to `/generate` endpoint [849](https://github.com/ethyca/fides/pull/849) * Add BigQuery support for the `generate` cli command [#814](https://github.com/ethyca/fides/pull/814) diff --git a/src/fidesctl/cli/__init__.py b/src/fidesctl/cli/__init__.py index 6343c3504e4..2a68caaa0d0 100644 --- a/src/fidesctl/cli/__init__.py +++ b/src/fidesctl/cli/__init__.py @@ -11,7 +11,7 @@ from fidesctl.core.config import get_config from .commands.annotate import annotate -from .commands.core import apply, evaluate, parse, sync +from .commands.core import apply, evaluate, parse, pull from .commands.crud import delete, get, ls from .commands.db import database from .commands.export import export @@ -34,7 +34,7 @@ get, ls, status, - sync, + pull, ] API_COMMAND_DICT = {command.name or str(command): command for command in API_COMMANDS} ALL_COMMANDS = API_COMMANDS + LOCAL_COMMANDS diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 1ba68988ddb..01ca21d8d59 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -1,4 +1,6 @@ """Contains all of the core CLI commands for Fidesctl.""" +from typing import Optional + import click from fidesctl.cli.options import ( @@ -12,7 +14,7 @@ from fidesctl.core import audit as _audit from fidesctl.core import evaluate as _evaluate from fidesctl.core import parse as _parse -from fidesctl.core import sync as _sync +from fidesctl.core import pull as _pull from fidesctl.core.utils import git_is_dirty @@ -138,24 +140,34 @@ def parse(ctx: click.Context, manifests_dir: str, verbose: bool = False) -> None @click.command() @click.pass_context @manifests_dir_argument +@click.option( + "--all-resources", + "-a", + default=None, + help="Pulls all locally missing resources from the server into this file.", +) @with_analytics -def sync(ctx: click.Context, manifests_dir: str) -> None: +def pull(ctx: click.Context, manifests_dir: str, all_resources: Optional[str]) -> None: """ Update local resource files by their fides_key to match their server versions. - Aborts the sync if there are unstaged or untracked files in the manifests dir. + Alternatively, with the "--all" flag all resources from the server will be pulled + down into a local file. + + The pull is aborted if there are unstaged or untracked files in the manifests dir. """ config = ctx.obj["CONFIG"] - # Do this to validate the manifests since they won't get parsed during the sync process + # Do this to validate the manifests since they won't get parsed during the pull process _parse.parse(manifests_dir) if git_is_dirty(manifests_dir): echo_red( - f"There are unstaged changes in your manifest directory: '{manifests_dir}' \nAborting sync!" + f"There are unstaged changes in your manifest directory: '{manifests_dir}' \nAborting pull!" ) raise SystemExit(1) - _sync.sync( + _pull.pull( url=config.cli.server_url, manifests_dir=manifests_dir, headers=config.user.request_headers, + all_resources=all_resources, ) diff --git a/src/fidesctl/cli/commands/util.py b/src/fidesctl/cli/commands/util.py index 01878428a93..064e54ab0b7 100644 --- a/src/fidesctl/cli/commands/util.py +++ b/src/fidesctl/cli/commands/util.py @@ -7,7 +7,12 @@ from fideslog.sdk.python.utils import OPT_OUT_COPY, OPT_OUT_PROMPT import fidesctl -from fidesctl.cli.utils import check_server, send_init_analytics, with_analytics +from fidesctl.cli.utils import ( + FIDESCTL_ASCII_ART, + check_server, + send_init_analytics, + with_analytics, +) from fidesctl.core.utils import echo_green @@ -45,7 +50,7 @@ def init(ctx: click.Context, fides_directory_location: str) -> None: "cli": {"server_protocol", "server_host", "server_port", "analytics_id"}, "user": {"analytics_opt_out"}, } - + click.echo(FIDESCTL_ASCII_ART) click.echo("Initializing Fidesctl...") separate() diff --git a/src/fidesctl/cli/utils.py b/src/fidesctl/cli/utils.py index 9f26b0e6553..0a2cc433578 100644 --- a/src/fidesctl/cli/utils.py +++ b/src/fidesctl/cli/utils.py @@ -34,6 +34,15 @@ from fidesctl.core.config.utils import get_config_from_file, update_config_file from fidesctl.core.utils import API_PREFIX, check_response, echo_green, echo_red +FIDESCTL_ASCII_ART = """ +███████╗██╗██████╗ ███████╗███████╗ ██████╗████████╗██╗ +██╔════╝██║██╔══██╗██╔════╝██╔════╝██╔════╝╚══██╔══╝██║ +█████╗ ██║██║ ██║█████╗ ███████╗██║ ██║ ██║ +██╔══╝ ██║██║ ██║██╔══╝ ╚════██║██║ ██║ ██║ +██║ ██║██████╔╝███████╗███████║╚██████╗ ██║ ███████╗ +╚═╝ ╚═╝╚═════╝ ╚══════╝╚══════╝ ╚═════╝ ╚═╝ ╚══════╝ +""" + def check_server(cli_version: str, server_url: str, quiet: bool = False) -> None: """Runs a health check and a version check against the server.""" diff --git a/src/fidesctl/core/api_helpers.py b/src/fidesctl/core/api_helpers.py index e4c47be96f6..4c9b258f720 100644 --- a/src/fidesctl/core/api_helpers.py +++ b/src/fidesctl/core/api_helpers.py @@ -82,24 +82,32 @@ def list_server_resources( headers: Dict[str, str], resource_type: str, exclude_keys: List[str], -) -> List[FidesModel]: + raw: bool = False, +) -> Optional[Union[List[FidesModel], List[Dict]]]: """ Get a list of resources from the server and return them as parsed objects. Returns an empty list if no resources are found or if the API returns an error. """ response: Response = api.ls(url=url, resource_type=resource_type, headers=headers) - server_resources: List[FidesModel] = ( + server_resources = ( [ - parse_dict( - resource_type=resource_type, - resource=resource, - from_server=True, - ) + resource for resource in response.json() if isinstance(resource, dict) and resource["fides_key"] not in exclude_keys ] if response.status_code >= 200 and response.status_code <= 299 else [] ) + + if not raw and server_resources: + server_resources = [ + parse_dict( + resource_type=resource_type, + resource=resource_dict, + from_server=True, + ) + for resource_dict in server_resources + ] + return server_resources diff --git a/src/fidesctl/core/pull.py b/src/fidesctl/core/pull.py new file mode 100644 index 00000000000..e09a029d57e --- /dev/null +++ b/src/fidesctl/core/pull.py @@ -0,0 +1,115 @@ +"""This module handles the logic for syncing remote resource versions into their local file.""" +from typing import Dict, List, Optional + +import yaml +from fideslang.manifests import load_yaml_into_dict + +from fidesctl.cli.utils import echo_green, print_divider +from fidesctl.core.api_helpers import get_server_resource, list_server_resources +from fidesctl.core.utils import get_manifest_list + + +def write_manifest_file(manifest_path: str, manifest: Dict) -> None: + """ + Write a manifest file out. + """ + with open(manifest_path, "w") as manifest_file: + yaml.dump(manifest, manifest_file, sort_keys=False, indent=2) + echo_green(f"Updated manifest file written out to: '{manifest_path}'") + + +def pull_existing_resources( + manifests_dir: str, url: str, headers: Dict[str, str] +) -> List[str]: + """ + Update all of the pre-existing local resources to match their + state on the server. + """ + manifest_path_list = get_manifest_list(manifests_dir) + # Store and return the keys of resources that get pulled here. + existing_keys: List[str] = [] + + print_divider() + for manifest_path in manifest_path_list: + print(f"Pulling file: '{manifest_path}'...") + manifest = load_yaml_into_dict(manifest_path) + updated_manifest = {} + + for resource_type in manifest.keys(): + resource_list = manifest[resource_type] + updated_resource_list = [] + + for resource in resource_list: + fides_key = resource["fides_key"] + existing_keys.append(fides_key) + + server_resource = get_server_resource( + url, resource_type, fides_key, headers, raw=True + ) + + if server_resource: + updated_resource_list.append(server_resource) + print( + f" - {resource_type.capitalize()} with fides_key: {fides_key} is being updated from the server..." + ) + else: + updated_resource_list.append(resource) + + updated_manifest[resource_type] = updated_resource_list + write_manifest_file(manifest_path, updated_manifest) + print_divider() + + return existing_keys + + +def pull_missing_resources( + manifest_path: str, url: str, headers: Dict[str, str], existing_keys: List[str] +) -> bool: + """ + Writes all "system", "dataset" and "policy" resources out locally + that currently only exist on the server. + """ + + print(f"Writing out new resources to file: '{manifest_path}'...") + resources_to_pull = ["system", "dataset", "policy"] + resource_manifest = { + resource: list_server_resources( + url=url, + headers=headers, + resource_type=resource, + exclude_keys=existing_keys, + raw=True, + ) + for resource in resources_to_pull + } + + # Write out the resources in a file + write_manifest_file(manifest_path, resource_manifest) + print_divider() + return True + + +def pull( + manifests_dir: str, + url: str, + headers: Dict[str, str], + all_resources: Optional[str], +) -> None: + """ + If a resource in a local file has a matching resource on the server, + write the server version into the local file. + + If the 'all' flag is passed, additionally pull all other server resources + into local files as well. + """ + existing_keys = pull_existing_resources(manifests_dir, url, headers) + + if all_resources: + pull_missing_resources( + manifest_path=all_resources, + url=url, + headers=headers, + existing_keys=existing_keys, + ) + + echo_green("Pull complete.") diff --git a/src/fidesctl/core/sync.py b/src/fidesctl/core/sync.py deleted file mode 100644 index 98563141158..00000000000 --- a/src/fidesctl/core/sync.py +++ /dev/null @@ -1,51 +0,0 @@ -"""This module handles the logic for syncing remote resource versions into their local file.""" -from typing import Dict - -import yaml -from fideslang.manifests import load_yaml_into_dict - -from fidesctl.cli.utils import echo_green, print_divider -from fidesctl.core.api_helpers import get_server_resource -from fidesctl.core.utils import get_manifest_list - - -def sync(manifests_dir: str, url: str, headers: Dict[str, str]) -> None: - """ - If a resource in a local file has a matching resource on the server, - write out the server version into the local file. - """ - - manifest_path_list = get_manifest_list(manifests_dir) - - print_divider() - for manifest_path in manifest_path_list: - print(f"Syncing file: '{manifest_path}'...") - manifest = load_yaml_into_dict(manifest_path) - updated_manifest = {} - - for resource_type in manifest.keys(): - resource_list = manifest[resource_type] - updated_resource_list = [] - - for resource in resource_list: - fides_key = resource["fides_key"] - server_resource = get_server_resource( - url, resource_type, fides_key, headers, raw=True - ) - - if server_resource: - updated_resource_list.append(server_resource) - print( - f" - {resource_type.capitalize()} with fides_key: {fides_key} is being updated from the server..." - ) - else: - updated_resource_list.append(resource) - - updated_manifest[resource_type] = updated_resource_list - - with open(manifest_path, "w") as manifest_file: - yaml.dump(updated_manifest, manifest_file, sort_keys=False, indent=2) - echo_green(f"Updated manifest file written out to: '{manifest_path}'") - print_divider() - - echo_green("Sync complete.") diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 923b3ddb660..bab98da6bbc 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -2,7 +2,6 @@ import os from base64 import b64decode from json import dump, loads -from pathlib import PosixPath from typing import Generator import pytest @@ -15,6 +14,13 @@ OKTA_URL = "https://dev-78908748.okta.com" +def git_reset(change_dir: str) -> None: + """This fixture is used to reset the repo files to HEAD.""" + + git_session = Repo().git() + git_session.checkout("HEAD", change_dir) + + @pytest.fixture() def test_cli_runner() -> Generator: runner = CliRunner() @@ -101,21 +107,50 @@ def test_dry_diff_apply(test_config_path: str, test_cli_runner: CliRunner) -> No @pytest.mark.integration -def test_sync( - test_config_path: str, test_cli_runner: CliRunner, tmp_path: PosixPath -) -> None: - """ - Due to the fact that this command checks the real git status, a pytest - tmp_dir can't be used. Consequently a real directory must be tested against - and then reset. - """ - test_dir = "demo_resources/" - result = test_cli_runner.invoke(cli, ["-f", test_config_path, "sync", test_dir]) - print(result.output) - assert result.exit_code == 0 - - git_session = Repo().git() - git_session.checkout("HEAD", test_dir) +class TestPull: + def test_pull( + self, + test_config_path: str, + test_cli_runner: CliRunner, + ) -> None: + """ + Due to the fact that this command checks the real git status, a pytest + tmp_dir can't be used. Consequently a real directory must be tested against + and then reset. + """ + test_dir = ".fides/" + result = test_cli_runner.invoke(cli, ["-f", test_config_path, "pull", test_dir]) + git_reset(test_dir) + print(result.output) + assert result.exit_code == 0 + + def test_pull_all( + self, + test_config_path: str, + test_cli_runner: CliRunner, + ) -> None: + """ + Due to the fact that this command checks the real git status, a pytest + tmp_dir can't be used. Consequently a real directory must be tested against + and then reset. + """ + test_dir = ".fides/" + test_file = ".fides/test_resources.yml" + result = test_cli_runner.invoke( + cli, + [ + "-f", + test_config_path, + "pull", + test_dir, + "-a", + ".fides/test_resources.yml", + ], + ) + git_reset(test_dir) + os.remove(test_file) + print(result.output) + assert result.exit_code == 0 @pytest.mark.integration diff --git a/tests/core/test_pull.py b/tests/core/test_pull.py new file mode 100644 index 00000000000..b7aa522bbfa --- /dev/null +++ b/tests/core/test_pull.py @@ -0,0 +1,23 @@ +import pytest +from git.repo import Repo + +from fidesctl.core.config import FidesctlConfig +from fidesctl.core.pull import pull_existing_resources + + +def git_reset(change_dir: str) -> None: + """This fixture is used to reset the repo files to HEAD.""" + + git_session = Repo().git() + git_session.checkout("HEAD", change_dir) + + +@pytest.mark.unit +def test_pull_existing_resources(test_config: FidesctlConfig) -> None: + """Placeholder test.""" + test_dir = ".fides/" + existing_keys = pull_existing_resources( + test_dir, test_config.cli.server_url, test_config.user.request_headers + ) + git_reset(test_dir) + assert len(existing_keys) > 1 diff --git a/tests/core/test_sync.py b/tests/core/test_sync.py deleted file mode 100644 index 73979aab247..00000000000 --- a/tests/core/test_sync.py +++ /dev/null @@ -1,3 +0,0 @@ -def test_sync() -> None: - """Placeholder test.""" - assert 1