From b270becc48c5d6d314d34e7a800eb0c710efaad5 Mon Sep 17 00:00:00 2001 From: tdruez <489057+tdruez@users.noreply.github.com> Date: Mon, 8 Feb 2021 10:08:17 +0400 Subject: [PATCH] Remove the dependency on Metaflow #82 (#89) * Remove the dependency on Metaflow #82 Signed-off-by: Thomas Druez * Unify Pipeline subclasses name #82 Signed-off-by: Thomas Druez * Move logging to the base Pipeline class #82 Signed-off-by: Thomas Druez * Add support for exception during Pipeline execution #82 Signed-off-by: Thomas Druez * Add entry in the CHANGELOG #82 Signed-off-by: Thomas Druez * Update the profiling method to the new log format #82 Signed-off-by: Thomas Druez * Replace indent function by built-in Python textwrap.indent #82 Signed-off-by: Thomas Druez --- CHANGELOG.rst | 6 + Makefile | 5 +- docs/scanpipe-pipelines.rst | 6 +- docs/scanpipe-pipes.rst | 10 +- etc/requirements/base.txt | 3 - etc/requirements/dev.txt | 1 + scancodeio/settings/base.py | 12 +- scanpipe/api/serializers.py | 5 - scanpipe/api/views.py | 21 +-- scanpipe/apps.py | 9 -- scanpipe/forms.py | 1 - scanpipe/logging.py | 101 ------------ .../management/commands/delete-project.py | 1 - scanpipe/management/commands/graph.py | 45 +++++- scanpipe/management/commands/run.py | 25 +-- scanpipe/migrations/0003_remove_run_run_id.py | 17 ++ scanpipe/models.py | 55 +++---- scanpipe/pipelines/__init__.py | 149 ++++++------------ scanpipe/pipelines/docker.py | 79 +++------- scanpipe/pipelines/load_inventory.py | 34 +--- scanpipe/pipelines/root_filesystems.py | 63 ++------ scanpipe/pipelines/scan_codebase.py | 45 +----- .../pipes/{utilities.py => compliance.py} | 22 +-- scanpipe/pipes/scancode.py | 1 - scanpipe/tasks.py | 18 +-- scanpipe/templates/scanpipe/base.html | 1 + .../scanpipe/includes/run_modal.html | 19 +-- scanpipe/tests/pipelines/do_nothing.py | 40 +++++ scanpipe/tests/pipelines/raise_exception.py | 34 ++++ scanpipe/tests/test_api.py | 28 +--- scanpipe/tests/test_commands.py | 36 ++--- scanpipe/tests/test_models.py | 57 +++---- scanpipe/tests/test_pipelines.py | 122 +++++++------- scanpipe/tests/test_tasks.py | 21 +-- scanpipe/views.py | 3 +- 35 files changed, 409 insertions(+), 686 deletions(-) delete mode 100644 scanpipe/logging.py create mode 100644 scanpipe/migrations/0003_remove_run_run_id.py rename scanpipe/pipes/{utilities.py => compliance.py} (76%) create mode 100644 scanpipe/tests/pipelines/do_nothing.py create mode 100644 scanpipe/tests/pipelines/raise_exception.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b146e29d6..0a1906b1e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ // Release notes // ------------- +### v1.1.0 (unreleased) + +- Remove the dependency on Metaflow + WARNING: The new Pipelines syntax is not backward compatible with v1.0.x + https://github.com/nexB/scancode.io/issues/82 + ### v1.0.7 (2021-02-01) - Add user interface to manage Projects from a web browser diff --git a/Makefile b/Makefile index 2554b72a4..20e6a8446 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,6 @@ conf: @echo "-> Configure the Python venv and install dependencies" ${PYTHON_EXE} -m venv . @${ACTIVATE} pip install -r etc/requirements/base.txt - @${ACTIVATE} pip install --editable . # Workaround https://github.com/python/typing/issues/573#issuecomment-405986724 @${ACTIVATE} pip uninstall --yes typing @@ -68,13 +67,13 @@ check: @echo "-> Run pycodestyle (PEP8) validation" @${ACTIVATE} pycodestyle --max-line-length=88 --exclude=lib,thirdparty,docs,bin,migrations,settings,data,pipelines,var . @echo "-> Run isort imports ordering validation" - @${ACTIVATE} isort --recursive --check-only . + @${ACTIVATE} isort --check-only . @echo "-> Run black validation" @${ACTIVATE} black --check ${BLACK_ARGS} isort: @echo "-> Apply isort changes to ensure proper imports ordering" - bin/isort --recursive --apply . + bin/isort . black: @echo "-> Apply black code formatter" diff --git a/docs/scanpipe-pipelines.rst b/docs/scanpipe-pipelines.rst index 4d5af92ad..52202c5a6 100644 --- a/docs/scanpipe-pipelines.rst +++ b/docs/scanpipe-pipelines.rst @@ -10,17 +10,17 @@ Pipeline Base Class Docker Image Analysis --------------------- -.. autoclass:: scanpipe.pipelines.docker.DockerPipeline() +.. autoclass:: scanpipe.pipelines.docker.Docker() :members: Load Inventory From Scan ------------------------ -.. autoclass:: scanpipe.pipelines.load_inventory.LoadInventoryFromScanCodeScan() +.. autoclass:: scanpipe.pipelines.load_inventory.LoadInventory() :members: Root Filesystem Analysis ------------------------ -.. autoclass:: scanpipe.pipelines.root_filesystems.RootfsPipeline() +.. autoclass:: scanpipe.pipelines.root_filesystems.RootFS() :members: Scan Codebase diff --git a/docs/scanpipe-pipes.rst b/docs/scanpipe-pipes.rst index b959020d4..4c83e8977 100644 --- a/docs/scanpipe-pipes.rst +++ b/docs/scanpipe-pipes.rst @@ -18,6 +18,11 @@ Codebase .. automodule:: scanpipe.pipes.codebase :members: +Compliance +---------- +.. automodule:: scanpipe.pipes.compliance + :members: + Debian ------ .. automodule:: scanpipe.pipes.debian @@ -44,8 +49,3 @@ ScanCode -------- .. automodule:: scanpipe.pipes.scancode :members: - -Utilities ---------- -.. automodule:: scanpipe.pipes.utilities - :members: diff --git a/etc/requirements/base.txt b/etc/requirements/base.txt index 1d5dff230..863d07b50 100644 --- a/etc/requirements/base.txt +++ b/etc/requirements/base.txt @@ -26,9 +26,6 @@ kombu==4.6.11 # WSGI server gunicorn==20.0.4 -# Metaflow -metaflow==2.2.6 - # Docker container_inspector==3.1.2 diff --git a/etc/requirements/dev.txt b/etc/requirements/dev.txt index 48cfc536b..8fca2ce84 100644 --- a/etc/requirements/dev.txt +++ b/etc/requirements/dev.txt @@ -3,6 +3,7 @@ # Code validation pycodestyle==2.6.0 black==20.8b1 +isort==5.7.0 # Documentation Sphinx==3.4.3 diff --git a/scancodeio/settings/base.py b/scancodeio/settings/base.py index 6240adbd3..8e4a222e7 100644 --- a/scancodeio/settings/base.py +++ b/scancodeio/settings/base.py @@ -129,6 +129,9 @@ }, ] +# True if running tests through `./manage test` +IS_TESTS = "test" in sys.argv + # Logging LOGGING = { @@ -144,15 +147,16 @@ }, "loggers": { "scanner.tasks": { - "handlers": ["console"], + "handlers": ["null"] if IS_TESTS else ["console"], + "level": env.str("DJANGO_LOG_LEVEL", "INFO"), + }, + "scanpipe.pipelines": { + "handlers": ["null"] if IS_TESTS else ["console"], "level": env.str("DJANGO_LOG_LEVEL", "INFO"), }, }, } -# True if running tests through `./manage test` -IS_TESTS = "test" in sys.argv - # Instead of sending out real emails the console backend just writes the emails # that would be sent to the standard output. EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend" diff --git a/scanpipe/api/serializers.py b/scanpipe/api/serializers.py index c1f14b564..617ac4c0c 100644 --- a/scanpipe/api/serializers.py +++ b/scanpipe/api/serializers.py @@ -56,7 +56,6 @@ class RunSerializer(SerializerExcludeFieldsMixin, serializers.ModelSerializer): project = serializers.HyperlinkedRelatedField( view_name="project-detail", read_only=True ) - task_output = serializers.SerializerMethodField() class Meta: model = Run @@ -66,7 +65,6 @@ class Meta: "description", "project", "uuid", - "run_id", "created_date", "task_id", "task_start_date", @@ -77,9 +75,6 @@ class Meta: "execution_time", ] - def get_task_output(self, run): - return run.task_output.split("\n")[1:] - class ProjectSerializer(ExcludeFromListViewMixin, serializers.ModelSerializer): pipeline = serializers.ChoiceField( diff --git a/scanpipe/api/views.py b/scanpipe/api/views.py index 6916602f9..e95cb4929 100644 --- a/scanpipe/api/views.py +++ b/scanpipe/api/views.py @@ -40,7 +40,8 @@ from scanpipe.models import Project from scanpipe.models import ProjectError from scanpipe.models import Run -from scanpipe.pipelines import get_pipeline_description +from scanpipe.pipelines import get_pipeline_class +from scanpipe.pipelines import get_pipeline_graph from scanpipe.views import project_results_json_response scanpipe_app_config = apps.get_app_config("scanpipe") @@ -92,7 +93,8 @@ def pipelines(self, request, *args, **kwargs): for location, name in scanpipe_app_config.pipelines: data[name] = { "location": location, - "description": get_pipeline_description(location).split("\n"), + "description": get_pipeline_class(location).get_doc(), + "steps": get_pipeline_graph(location), } return Response(data) @@ -188,18 +190,3 @@ def start_pipeline(self, request, *args, **kwargs): transaction.on_commit(run.run_pipeline_task_async) return Response({"status": f"Pipeline {run.pipeline} started."}) - - @action(detail=True, methods=["get"]) - def resume_pipeline(self, request, *args, **kwargs): - run = self.get_object() - - if run.task_succeeded: - message = {"status": "Cannot resume a successful pipeline run."} - return Response(message, status=status.HTTP_400_BAD_REQUEST) - elif not run.task_start_date: - message = {"status": "Cannot resume never started pipeline run."} - return Response(message, status=status.HTTP_400_BAD_REQUEST) - - transaction.on_commit(run.resume_pipeline_task_async) - - return Response({"status": f"Pipeline {run.pipeline} resumed."}) diff --git a/scanpipe/apps.py b/scanpipe/apps.py index f7c020316..0a684345d 100644 --- a/scanpipe/apps.py +++ b/scanpipe/apps.py @@ -25,9 +25,6 @@ from django.apps import AppConfig from django.utils.translation import gettext_lazy as _ -from scanpipe.logging import RunLogger -from scanpipe.logging import extra_logging - dot_py_suffix = ".py" @@ -58,12 +55,6 @@ def ready(self): name = remove_dot_py_suffix(child.name) self.pipelines.append((location, name)) - # Decorates the default metaflow logger to capture log messages - # Warning: This import cannot be moved outside this method - from metaflow import cli - - cli.logger = extra_logging(cli.logger, RunLogger()) - def is_valid(self, pipeline): """ Return True if the pipeline is valid and available. diff --git a/scanpipe/forms.py b/scanpipe/forms.py index 6f9fdf6a1..8dd44ba48 100644 --- a/scanpipe/forms.py +++ b/scanpipe/forms.py @@ -21,7 +21,6 @@ # Visit https://github.com/nexB/scancode.io for support and download. from django import forms -from django.db import transaction import django_filters diff --git a/scanpipe/logging.py b/scanpipe/logging.py deleted file mode 100644 index 39473716c..000000000 --- a/scanpipe/logging.py +++ /dev/null @@ -1,101 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# -# http://nexb.com and https://github.com/nexB/scancode.io -# The ScanCode.io software is licensed under the Apache License version 2.0. -# Data generated with ScanCode.io is provided as-is without warranties. -# ScanCode is a trademark of nexB Inc. -# -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES -# OR CONDITIONS OF ANY KIND, either express or implied. No content created from -# ScanCode.io should be considered or used as legal advice. Consult an Attorney -# for any legal advice. -# -# ScanCode.io is a free software code scanning tool from nexB Inc. and others. -# Visit https://github.com/nexB/scancode.io for support and download. - -import functools -import re -from contextlib import suppress -from datetime import datetime -from pathlib import Path - -from django.apps import apps -from django.core.exceptions import MultipleObjectsReturned -from django.core.exceptions import ObjectDoesNotExist -from django.utils.functional import cached_property - -from scancodeio import WORKSPACE_LOCATION - - -def extra_logging(func, extra_logger): - """ - Decorator to add logging customization the default Metaflow logger. - This is used as a workaround since Metaflow does not provide any API to customize - the logging of Flow/Pipeline execution. - """ - - @functools.wraps(func) - def wrapper_decorator(*args, **kwargs): - value = func(*args, **kwargs) - extra_logger.log(*args, **kwargs) - return value - - return wrapper_decorator - - -class RunLogger: - """ - Log messages from Pipeline execution on the Run instance `log` field. - If the `run_id` is not available in the message, the message is logged to the - `log_file`. - - One caveat to this approach is that the `run_id` is set on the Run instance during - the "start" step of a Pipeline, making the "Task is starting." message for that - initial step not logged in the `Run.log` field. - """ - - log_file = Path(WORKSPACE_LOCATION) / "scanpipe.log" - - @cached_property - def run_model(self): - return apps.get_model("scanpipe", "Run") - - @staticmethod - def get_run_id(head): - run_id_pattern = re.compile(r"\[(?P[0-9]{16})/") - match = run_id_pattern.search(head) - if match: - return match.group("run_id") - - def get_run(self, run_id): - if not run_id: - return - with suppress(ObjectDoesNotExist, MultipleObjectsReturned): - return self.run_model.objects.get(run_id=run_id) - - def log(self, body="", head="", **kwargs): - timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] - message = f"{timestamp} {head}{body}" - - run_id = self.get_run_id(head) - run = self.get_run(run_id) - if run: - self.log_to_run_instance(run, message) - else: - self.log_to_file(message) - - @staticmethod - def log_to_run_instance(run, message): - run.append_to_log(message) - run.save() - - def log_to_file(self, message): - with open(self.log_file, "a+") as f: - f.write(message + "\n") diff --git a/scanpipe/management/commands/delete-project.py b/scanpipe/management/commands/delete-project.py index 04575e2f3..cc2c0cf1f 100644 --- a/scanpipe/management/commands/delete-project.py +++ b/scanpipe/management/commands/delete-project.py @@ -20,7 +20,6 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -import shutil import sys from scanpipe.management.commands import ProjectCommand diff --git a/scanpipe/management/commands/graph.py b/scanpipe/management/commands/graph.py index b8455c6d8..10c21d975 100644 --- a/scanpipe/management/commands/graph.py +++ b/scanpipe/management/commands/graph.py @@ -22,23 +22,50 @@ import subprocess import sys +from textwrap import indent from django.core.management import CommandError from django.core.management.base import BaseCommand from scanpipe.management.commands import scanpipe_app_config -from scanpipe.pipelines import PipelineGraph from scanpipe.pipelines import get_pipeline_class from scanpipe.pipelines import get_pipeline_doc def is_graphviz_installed(): - exitcode, _ = subprocess.getstatusoutput("which dot") + exitcode = subprocess.getstatusoutput("which dot")[0] if exitcode == 0: return True return False +def pipeline_graph_dot(pipeline_class, fontname="Helvetica", shape="record"): + """ + Return the pipeline graph as DOT format compatible with Graphviz. + """ + dot_output = [f"digraph {pipeline_class.__name__} {{", "rankdir=TB;"] + + edges = [] + nodes = [] + steps = pipeline_class.steps + step_count = len(steps) + + for index, step in enumerate(steps, start=1): + step_name = step.__name__ + edges.append( + f'"{step_name}"' + f'[label=<{step_name}> fontname="{fontname}" shape="{shape}"];' + ) + if index < step_count: + next_step = steps[index] + nodes.append(f"{step_name} -> {next_step.__name__};") + + dot_output.extend(edges) + dot_output.extend(nodes) + dot_output.append("}") + return "\n".join(dot_output) + + class Command(BaseCommand): help = "Generate pipeline graph with Graphviz." @@ -60,7 +87,8 @@ def handle(self, *pipelines, **options): if options["list"]: for location, _ in scanpipe_app_config.pipelines: self.stdout.write("- " + self.style.SUCCESS(location)) - self.stdout.write(get_pipeline_doc(location), ending="\n\n") + pipeline_doc = get_pipeline_doc(location) + self.stdout.write(indent(pipeline_doc, " "), ending="\n\n") sys.exit(0) if not is_graphviz_installed(): @@ -79,17 +107,18 @@ def handle(self, *pipelines, **options): self.style.ERROR(f"{pipeline_location} is not valid.") ) sys.exit(1) - pipeline_graph = PipelineGraph(pipeline_class) - outputs.append(self.generate_graph(pipeline_graph, options.get("output"))) + + output_directory = options.get("output") + outputs.append(self.generate_graph_png(pipeline_class, output_directory)) separator = "\n - " msg = f"Graph(s) generated:{separator}" + separator.join(outputs) self.stdout.write(self.style.SUCCESS(msg)) @staticmethod - def generate_graph(pipeline_graph, output_directory): - output_dot = pipeline_graph.output_dot(simplify=True) - output_location = f"{pipeline_graph.name}.png" + def generate_graph_png(pipeline_class, output_directory): + output_dot = pipeline_graph_dot(pipeline_class) + output_location = f"{pipeline_class.__name__}.png" if output_directory: output_location = f"{output_directory}/{output_location}" dot_cmd = f'echo "{output_dot}" | dot -Tpng -o {output_location}' diff --git a/scanpipe/management/commands/run.py b/scanpipe/management/commands/run.py index f497ca193..064a5b151 100644 --- a/scanpipe/management/commands/run.py +++ b/scanpipe/management/commands/run.py @@ -30,33 +30,18 @@ class Command(ProjectCommand): help = "Run pipelines of a project." - def add_arguments(self, parser): - super().add_arguments(parser) - parser.add_argument( - "--resume", - action="store_true", - help="Resume the latest failed pipeline execution.", - ) - def handle(self, *args, **options): super().handle(*args, **options) - if options["resume"]: - action = "resume" - run = self.project.get_latest_failed_run() - task_function = "resume_pipeline_task_async" - else: - action = "run" - run = self.project.get_next_run() - task_function = "run_pipeline_task_async" + run = self.project.get_next_run() if not run: - raise CommandError(f"No pipelines to {action} on project {self.project}") - - self.stdout.write(f"Pipeline {run.pipeline} {action} in progress...") - getattr(run, task_function)() + raise CommandError(f"No pipelines to run on project {self.project}") + self.stdout.write(f"Pipeline {run.pipeline} run in progress...") + run.run_pipeline_task_async() run.refresh_from_db() + if run.task_succeeded: msg = f"{run.pipeline} successfully executed on project {self.project}" self.stdout.write(self.style.SUCCESS(msg)) diff --git a/scanpipe/migrations/0003_remove_run_run_id.py b/scanpipe/migrations/0003_remove_run_run_id.py new file mode 100644 index 000000000..2162b6d47 --- /dev/null +++ b/scanpipe/migrations/0003_remove_run_run_id.py @@ -0,0 +1,17 @@ +# Generated by Django 3.1.5 on 2021-02-04 07:18 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0002_run_id_and_log'), + ] + + operations = [ + migrations.RemoveField( + model_name='run', + name='run_id', + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 1697664ff..93c24ed8a 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -20,6 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. +import re import shutil import traceback import uuid @@ -44,6 +45,7 @@ from scanpipe.apps import remove_dot_py_suffix from scanpipe.packagedb_models import AbstractPackage from scanpipe.packagedb_models import AbstractResource +from scanpipe.pipelines import get_pipeline_class from scanpipe.pipelines import get_pipeline_doc @@ -496,10 +498,13 @@ def failed(self): class Run(UUIDPKModel, ProjectRelatedModel, AbstractTaskFieldsModel): + """ + The Database representation of a Pipeline execution. + """ + pipeline = models.CharField(max_length=1024) created_date = models.DateTimeField(auto_now_add=True, db_index=True) description = models.TextField(blank=True) - run_id = models.CharField(max_length=16, blank=True, editable=False) log = models.TextField(blank=True, editable=False) objects = RunQuerySet.as_manager() @@ -513,8 +518,12 @@ def __str__(self): def run_pipeline_task_async(self): tasks.run_pipeline_task.apply_async(args=[self.pk], queue="default") - def resume_pipeline_task_async(self): - tasks.run_pipeline_task.apply_async(args=[self.pk, True], queue="default") + @property + def pipeline_class(self): + return get_pipeline_class(self.pipeline) + + def make_pipeline_instance(self): + return self.pipeline_class(self) @property def task_succeeded(self): @@ -544,30 +553,23 @@ def profile(self, print_results=False): if not self.task_succeeded: return + pattern = re.compile(r"Step \[(?P.+)] completed in (?P