From 116b8f3c2b99badfbb4d7de3dc9424df8aeab4cb Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 3 Jun 2024 11:36:53 -0700 Subject: [PATCH] Remove API for clearcode and matchcode #241 #445 #453 Signed-off-by: Jono Yang --- clearcode/api.py | 80 ---------- clearcode/tests/test_api.py | 126 --------------- docs/source/purldb/api.rst | 14 ++ matchcode/api.py | 308 ------------------------------------ matchcode/tests/test_api.py | 168 -------------------- packagedb/api.py | 82 ++++++++-- 6 files changed, 86 insertions(+), 692 deletions(-) delete mode 100644 clearcode/api.py delete mode 100644 clearcode/tests/test_api.py delete mode 100644 matchcode/api.py delete mode 100644 matchcode/tests/test_api.py diff --git a/clearcode/api.py b/clearcode/api.py deleted file mode 100644 index 42bcd6c1..00000000 --- a/clearcode/api.py +++ /dev/null @@ -1,80 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# -# ClearCode is a free software tool from nexB Inc. and others. -# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import base64 - -from django.urls import include, re_path -from rest_framework import routers -from rest_framework import serializers -from rest_framework import viewsets - -from clearcode.models import CDitem - - -class CDitemContentFieldSerializer(serializers.Field): - """ - Custom Field Serializer used to translate between Django ORM binary field and - base64-encoded string - """ - def to_representation(self, obj): - return base64.b64encode(obj).decode('utf-8') - - def to_internal_value(self, data): - return base64.b64decode(data) - - -class CDitemSerializer(serializers.HyperlinkedModelSerializer): - """ - Custom Serializer used to serialize the CDitem model - """ - content = CDitemContentFieldSerializer(required=False) - class Meta: - model = CDitem - fields = ( - 'path', - 'uuid', - 'content', - 'last_modified_date', - 'last_map_date', - 'map_error', - ) - - -class CDitemViewSet(viewsets.ModelViewSet): - """ - API endpoint that allows CDitems to be viewed. - """ - serializer_class = CDitemSerializer - lookup_field = 'uuid' - - def get_queryset(self): - last_modified_date = self.request.query_params.get('last_modified_date', None) - queryset = CDitem.objects.all() - - if last_modified_date: - queryset = CDitem.objects.modified_after(last_modified_date) - - return queryset - - -router = routers.DefaultRouter() -router.register(r'cditems', CDitemViewSet, 'cditems') - -urlpatterns = [ - re_path('^api/', include((router.urls, 'api'))), -] diff --git a/clearcode/tests/test_api.py b/clearcode/tests/test_api.py deleted file mode 100644 index 76f3feba..00000000 --- a/clearcode/tests/test_api.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# -# ClearCode is a free software tool from nexB Inc. and others. -# Visit https://github.com/nexB/clearcode-toolkit/ for support and download. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import base64 -import datetime -import gzip -import json - -from django.test import TestCase -from django.utils import timezone -from rest_framework import status -from rest_framework.test import APIClient - -from clearcode import api -from clearcode.models import CDitem - - -class CDitemSerializerTestCase(TestCase): - - def setUp(self): - self.cditem_attributes = { - 'path': 'test/path/file.json', - 'content': gzip.compress(json.dumps({'test': 'content'}).encode('utf-8')) - } - self.cditem = CDitem.objects.create(**self.cditem_attributes) - self.serializer = api.CDitemSerializer(instance=self.cditem) - self.data = self.serializer.data - - def test_contains_expected_fields(self): - self.assertCountEqual(self.data.keys(), ['path', 'uuid', 'content', 'last_modified_date', 'last_map_date', 'map_error']) - - def test_path_field_content(self): - self.assertEqual(self.data['path'], self.cditem_attributes['path']) - - def test_content_field_content(self): - decoded_test_data = base64.b64decode(self.data['content']) - self.assertEqual(decoded_test_data, self.cditem_attributes['content']) - self.assertEqual(json.loads(gzip.decompress(decoded_test_data)), {'test': 'content'}) - - def test_last_map_date_field_content(self): - self.assertIsNone(self.data['last_map_date']) - - def test_map_error_field_content(self): - self.assertIsNone(self.data['map_error']) - - -class CDitemAPITestCase(TestCase): - - def setUp(self): - self.client = APIClient() - self.test_path = 'test/path/file.json' - - self.post_test_path = 'test/post/path/file.json' - - self.test_data = {'test': 'content'} - self.test_content = gzip.compress(json.dumps(self.test_data).encode('utf-8')) - - self.cditem = CDitem.objects.create(path=self.test_path) - self.uuid = self.cditem.uuid - - def test_api_cditems_get(self): - response = self.client.get('/api/cditems/{}/'.format(self.uuid)) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.data.get('path'), self.test_path) - self.assertEqual(response.data.get('uuid'), str(self.uuid)) - - def test_api_cditems_get_list(self): - response = self.client.get('/api/cditems/') - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(1, response.data.get('count')) - - def test_api_cditems_get_list_by_last_modified_date_old_date(self): - test_date = datetime.datetime.now() - datetime.timedelta(days=1) - test_date_string = '{}-{}-{}'.format(test_date.year, test_date.month, test_date.day) - - response = self.client.get('/api/cditems/?last_modified_date={}'.format(test_date_string)) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(1, response.data.get('count')) - - def test_api_cditems_get_list_by_last_modified_date_future(self): - test_date = datetime.datetime.now() + datetime.timedelta(days=1) - test_date_string = '{}-{}-{}'.format(test_date.year, test_date.month, test_date.day) - - response = self.client.get('/api/cditems/?last_modified_date={}'.format(test_date_string)) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(0, response.data.get('count')) - - def test_api_cditems_put(self): - test_payload = { - 'path': self.test_path, - 'content': base64.b64encode(self.test_content).decode('utf-8') - } - - response = self.client.put('/api/cditems/{}/'.format(self.uuid), test_payload) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - cditem = CDitem.objects.get(path=self.test_path) - self.assertEqual(cditem.data, self.test_data) - - def test_api_cditems_post(self): - test_payload = { - 'path': self.post_test_path, - 'content': base64.b64encode(self.test_content).decode('utf-8') - } - - response = self.client.post('/api/cditems/', test_payload) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.data.get('path'), self.post_test_path) - - cditem = CDitem.objects.get(path=self.post_test_path) - self.assertEqual(cditem.data, self.test_data) diff --git a/docs/source/purldb/api.rst b/docs/source/purldb/api.rst index d6aef4f5..22000294 100644 --- a/docs/source/purldb/api.rst +++ b/docs/source/purldb/api.rst @@ -916,6 +916,7 @@ Return a list of package sets and the package data of packages within ``GET /api/projects/0bbdcf88-ad07-4970-9272-7d5f4c82cc7b/`` .. code-block:: json + { "count": 8198, "next": "https://public.purldb.io/api/package_sets/?page=2", @@ -1100,3 +1101,16 @@ Return a ``git_repo`` from a standard PackageURL ``package_url``. { "git_repo": "git+https://github.com/ckeditor/ckeditor4-react.git" } + + +matching +-------- + + +.. code-block:: json + +{ + "upload_file": null, + "input_urls": [], + "webhook_url": "" +} \ No newline at end of file diff --git a/matchcode/api.py b/matchcode/api.py deleted file mode 100644 index 9810645a..00000000 --- a/matchcode/api.py +++ /dev/null @@ -1,308 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# -from django.db.models import Q -from django.forms import widgets -from django.forms.fields import MultipleChoiceField -from django_filters.filters import MultipleChoiceFilter -from django_filters.rest_framework import FilterSet -from rest_framework.decorators import action -from rest_framework.response import Response -from rest_framework.serializers import CharField -from rest_framework.serializers import FloatField -from rest_framework.serializers import HyperlinkedRelatedField -from rest_framework.serializers import ModelSerializer -from rest_framework.serializers import ReadOnlyField -from rest_framework.serializers import Serializer -from rest_framework.viewsets import ReadOnlyModelViewSet - -from matchcode_toolkit.fingerprinting import create_halohash_chunks -from matchcode_toolkit.fingerprinting import hexstring_to_binarray -from matchcode_toolkit.fingerprinting import split_fingerprint -from matchcode_toolkit.halohash import byte_hamming_distance -from matchcode.models import ExactFileIndex -from matchcode.models import ExactPackageArchiveIndex -from matchcode.models import ApproximateDirectoryContentIndex -from matchcode.models import ApproximateDirectoryStructureIndex - - -class BaseFileIndexSerializer(ModelSerializer): - sha1 = CharField(source='fingerprint') - package = HyperlinkedRelatedField( - view_name='api:package-detail', - lookup_field='uuid', - read_only=True - ) - - -class ExactFileIndexSerializer(BaseFileIndexSerializer): - class Meta: - model = ExactFileIndex - fields = ( - 'sha1', - 'package' - ) - - -class ExactPackageArchiveIndexSerializer(BaseFileIndexSerializer): - class Meta: - model = ExactPackageArchiveIndex - fields = ( - 'sha1', - 'package' - ) - - -class BaseDirectoryIndexSerializer(ModelSerializer): - fingerprint = ReadOnlyField() - package = HyperlinkedRelatedField( - view_name='api:package-detail', - lookup_field='uuid', - read_only=True - ) - -class ApproximateDirectoryContentIndexSerializer(BaseDirectoryIndexSerializer): - class Meta: - model = ApproximateDirectoryContentIndex - fields = ( - 'fingerprint', - 'package', - ) - - -class ApproximateDirectoryStructureIndexSerializer(BaseDirectoryIndexSerializer): - class Meta: - model = ApproximateDirectoryStructureIndex - fields = ( - 'fingerprint', - 'package', - ) - - -class BaseDirectoryIndexMatchSerializer(Serializer): - fingerprint = CharField() - matched_fingerprint = CharField() - package = HyperlinkedRelatedField( - view_name='api:package-detail', - lookup_field='uuid', - read_only=True - ) - similarity_score = FloatField() - - -class CharMultipleWidget(widgets.TextInput): - """ - Enables the support for `MultiValueDict` `?field=a&field=b` - reusing the `SelectMultiple.value_from_datadict()` but render as a `TextInput`. - """ - def value_from_datadict(self, data, files, name): - value = widgets.SelectMultiple().value_from_datadict(data, files, name) - if not value or value == ['']: - return '' - - return value - - def format_value(self, value): - """ - Return a value as it should appear when rendered in a template. - """ - return ', '.join(value) - - -class MultipleCharField(MultipleChoiceField): - """ - Overrides `MultipleChoiceField` to fit in `MultipleCharFilter`. - """ - widget = CharMultipleWidget - - def valid_value(self, value): - return True - - -class MultipleCharFilter(MultipleChoiceFilter): - """ - Filters on multiple values for a CharField type using `?field=a&field=b` URL syntax. - """ - field_class = MultipleCharField - - -# TODO: Think of a better name for this filter -class MultipleCharInFilter(MultipleCharFilter): - def filter(self, qs, value): - if not value: - # Even though not a noop, no point filtering if empty. - return qs - - if self.is_noop(qs, value): - return qs - - predicate = self.get_filter_predicate(value) - old_field_name = next(iter(predicate)) - new_field_name = f'{old_field_name}__in' - predicate[new_field_name] = predicate[old_field_name] - predicate.pop(old_field_name) - - q = Q(**predicate) - qs = self.get_method(qs)(q) - - return qs.distinct() if self.distinct else qs - - -class MultipleSHA1Filter(MultipleCharFilter): - """ - Overrides `MultipleCharFilter.filter()` to convert the SHA1 - into a bytearray so it can be queried - """ - def filter(self, qs, value): - if not value: - return qs - - q = Q() - for val in value: - v = hexstring_to_binarray(val) - q.add(Q(sha1=v), Q.OR) - - return qs.filter(q) - - -class MultipleFingerprintFilter(MultipleCharFilter): - """ - Overrides `MultipleCharFilter.filter()` to process fingerprint from a single - string into multiple values used for querying. - - In the BaseDirectoryIndex model, the fingerprint is stored in four chunks of - equal size, not as a single field that contains the entire fingerprint. We - must process the fingerprint into the correct parts so we can use those - parts to query the different fields. - """ - def filter(self, qs, value): - if not value: - return qs - - q = Q() - for val in value: - indexed_elements_count, bah128 = split_fingerprint(val) - chunk1, chunk2, chunk3, chunk4 = create_halohash_chunks(bah128) - q.add( - Q( - indexed_elements_count=indexed_elements_count, - chunk1=chunk1, - chunk2=chunk2, - chunk3=chunk3, - chunk4=chunk4 - ), - Q.OR - ) - - return qs.filter(q) - - -class BaseFileIndexFilterSet(FilterSet): - sha1 = MultipleSHA1Filter() - - -class ExactFileIndexFilterSet(BaseFileIndexFilterSet): - class Meta: - model = ExactFileIndex - fields = ( - 'sha1', - ) - - -class ExactPackageArchiveFilterSet(BaseFileIndexFilterSet): - class Meta: - model = ExactPackageArchiveIndex - fields = ( - 'sha1', - ) - - -class BaseDirectoryIndexFilterSet(FilterSet): - fingerprint = MultipleFingerprintFilter() - - -class ApproximateDirectoryContentFilterSet(BaseDirectoryIndexFilterSet): - class Meta: - model = ApproximateDirectoryContentIndex - fields = ( - 'fingerprint', - ) - - -class ApproximateDirectoryStructureFilterSet(BaseDirectoryIndexFilterSet): - class Meta: - model = ApproximateDirectoryStructureIndex - fields = ( - 'fingerprint', - ) - - -class BaseFileIndexViewSet(ReadOnlyModelViewSet): - lookup_field = 'sha1' - - -class ExactFileIndexViewSet(BaseFileIndexViewSet): - queryset = ExactFileIndex.objects.all() - serializer_class = ExactFileIndexSerializer - filterset_class = ExactFileIndexFilterSet - - -class ExactPackageArchiveIndexViewSet(BaseFileIndexViewSet): - queryset = ExactPackageArchiveIndex.objects.all() - serializer_class = ExactPackageArchiveIndexSerializer - filterset_class = ExactPackageArchiveFilterSet - - -class BaseDirectoryIndexViewSet(ReadOnlyModelViewSet): - lookup_field = 'fingerprint' - - @action(detail=False) - def match(self, request): - fingerprints = request.query_params.getlist('fingerprint') - if not fingerprints: - return Response() - - model_class = self.get_serializer().Meta.model - results = [] - unique_fingerprints = set(fingerprints) - for fingerprint in unique_fingerprints: - matches = model_class.match(fingerprint) - for match in matches: - _, bah128 = split_fingerprint(fingerprint) - # Get fingerprint from the match - fp = match.fingerprint() - _, match_bah128 = split_fingerprint(fp) - hd = byte_hamming_distance(bah128, match_bah128) - similarity_score = (128 - hd) / 128 - results.append( - { - 'fingerprint': fingerprint, - 'matched_fingerprint': fp, - 'package': match.package, - 'similarity_score': similarity_score, - } - ) - - serialized_match_results = BaseDirectoryIndexMatchSerializer( - results, - context={'request': request}, - many=True - ) - return Response(serialized_match_results.data) - - -class ApproximateDirectoryContentIndexViewSet(BaseDirectoryIndexViewSet): - queryset = ApproximateDirectoryContentIndex.objects.all() - serializer_class = ApproximateDirectoryContentIndexSerializer - filterset_class = ApproximateDirectoryContentFilterSet - - -class ApproximateDirectoryStructureIndexViewSet(BaseDirectoryIndexViewSet): - queryset = ApproximateDirectoryStructureIndex.objects.all() - serializer_class = ApproximateDirectoryStructureIndexSerializer - filterset_class = ApproximateDirectoryStructureFilterSet diff --git a/matchcode/tests/test_api.py b/matchcode/tests/test_api.py deleted file mode 100644 index 02d79abf..00000000 --- a/matchcode/tests/test_api.py +++ /dev/null @@ -1,168 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# purldb is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/purldb for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import os - -from django.urls import reverse - -from packagedb.models import Package - -from matchcode.utils import index_package_directories -from matchcode.utils import load_resources_from_scan -from matchcode.utils import MatchcodeTestCase -from matchcode.tests import FIXTURES_REGEN - - -class ApproximateDirectoryStructureIndexAPITestCase(MatchcodeTestCase): - BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') - - def setUp(self): - # Execute the superclass' setUp method before creating our own - # DB objects - super().setUp() - - self.test_package1, _ = Package.objects.get_or_create( - filename='plugin-request-2.4.1.tgz', - sha1='7295749caddd3c52be472eef6623a7b441ed17d6', - size=7269, - name='plugin-request', - version='2.4.1', - download_url='https://registry.npmjs.org/@umijs/plugin-request/-/plugin-request-2.4.1.tgz', - type='npm', - ) - load_resources_from_scan(self.get_test_loc('match/nested/plugin-request-2.4.1-ip.json'), self.test_package1) - index_package_directories(self.test_package1) - - self.test_package2, _ = Package.objects.get_or_create( - filename='underscore-1.10.9.tgz', - sha1='ba7a9cfc15873e67821611503a34a7c26bf7264f', - size=26569, - name='underscore', - version='1.10.9', - download_url='https://registry.npmjs.org/@types/underscore/-/underscore-1.10.9.tgz', - type='npm', - ) - load_resources_from_scan(self.get_test_loc('match/nested/underscore-1.10.9-ip.json'), self.test_package2) - index_package_directories(self.test_package2) - - def test_api_approximate_directory_content_index_list_fingerprint_lookup(self): - test_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7df45' - response = self.client.get( - reverse('api:approximatedirectorycontentindex-list'), - data={'fingerprint': test_fingerprint} - ) - self.assertEqual(200, response.status_code) - results = response.data.get('results', []) - self.assertEqual(1, len(results)) - result = results[0] - expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package1.uuid]) - expected_result = { - 'fingerprint': '00000007af7d63765c78fa516b5353f5ffa7df45', - 'package': expected_package - } - self.assertEqual(expected_result, result) - - def test_api_approximate_directory_structure_index_list_fingerprint_lookup(self): - test_fingerprint = '00000004d10982208810240820080a6a3e852486' - response = self.client.get( - reverse('api:approximatedirectorystructureindex-list'), - data={'fingerprint': test_fingerprint} - ) - self.assertEqual(200, response.status_code) - results = response.data.get('results', []) - self.assertEqual(1, len(results)) - result = results[0] - expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package2.uuid]) - expected_result = { - 'fingerprint': '00000004d10982208810240820080a6a3e852486', - 'package': expected_package - } - self.assertEqual(expected_result, result) - - def test_api_approximate_directory_content_index_match_no_match(self): - test_fingerprint = '000000020e1d2124040134564e1941a6a620db34' - response = self.client.get( - reverse('api:approximatedirectorycontentindex-match'), - data={'fingerprint': test_fingerprint} - ) - results = response.data - self.assertEqual(0, len(results)) - - def test_api_approximate_directory_structure_index_match_no_match(self): - test_fingerprint = '00000004d10982789010240876580a6a3e852485' - response = self.client.get( - reverse('api:approximatedirectorystructureindex-match'), - data={'fingerprint': test_fingerprint} - ) - results = response.data - self.assertEqual(0, len(results)) - - def test_api_approximate_directory_content_index_match_close_match(self): - # This test fingerprint has a hamming distance of 7 from the expected fingerprint - test_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7d000' - response = self.client.get( - reverse('api:approximatedirectorycontentindex-match'), - data={'fingerprint': test_fingerprint} - ) - results = response.data - self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(test_fingerprint, result['fingerprint']) - expected_matched_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7df45' - self.assertEqual(expected_matched_fingerprint, result['matched_fingerprint']) - expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package1.uuid]) - self.assertEqual(expected_package, result['package']) - self.assertEqual(0.9453125, result['similarity_score']) - - def test_api_approximate_directory_structure_index_match_close_match(self): - # This test fingerprint has a hamming distance of 7 from the expected fingerprint - test_fingerprint = '00000004d10982208810240820080a6a3e800000' - response = self.client.get( - reverse('api:approximatedirectorystructureindex-match'), - data={'fingerprint': test_fingerprint} - ) - results = response.data - self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(test_fingerprint, result['fingerprint']) - expected_matched_fingerprint = '00000004d10982208810240820080a6a3e852486' - self.assertEqual(expected_matched_fingerprint, result['matched_fingerprint']) - expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package2.uuid]) - self.assertEqual(expected_package, result['package']) - self.assertEqual(0.9453125, result['similarity_score']) - - def test_api_approximate_directory_content_index_match(self): - test_fingerprint = '00000007af7d63765c78fa516b5353f5ffa7df45' - response = self.client.get( - reverse('api:approximatedirectorycontentindex-match'), - data={'fingerprint': test_fingerprint} - ) - results = response.data - self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(test_fingerprint, result['fingerprint']) - self.assertEqual(test_fingerprint, result['matched_fingerprint']) - expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package1.uuid]) - self.assertEqual(expected_package, result['package']) - self.assertEqual(1.0, result['similarity_score']) - - def test_api_approximate_directory_structure_index_match(self): - test_fingerprint = '00000004d10982208810240820080a6a3e852486' - response = self.client.get( - reverse('api:approximatedirectorystructureindex-match'), - data={'fingerprint': test_fingerprint} - ) - results = response.data - self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(test_fingerprint, result['fingerprint']) - self.assertEqual(test_fingerprint, result['matched_fingerprint']) - expected_package = 'http://testserver' + reverse('api:package-detail', args=[self.test_package2.uuid]) - self.assertEqual(expected_package, result['package']) - self.assertEqual(1.0, result['similarity_score']) diff --git a/packagedb/api.py b/packagedb/api.py index 164aec31..747f591d 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -14,9 +14,13 @@ from django.db.models import OuterRef from django.db.models import Q from django.db.models import Subquery +from django.forms import widgets +from django.forms.fields import MultipleChoiceField from django_filters.filters import Filter +from django_filters.filters import MultipleChoiceFilter from django_filters.filters import OrderingFilter from django_filters.rest_framework import FilterSet + from drf_spectacular.plumbing import build_array_type from drf_spectacular.plumbing import build_basic_type from drf_spectacular.types import OpenApiTypes @@ -35,8 +39,6 @@ from univers.version_range import VersionRange from univers.versions import InvalidVersion -from matchcode.api import MultipleCharFilter -from matchcode.api import MultipleCharInFilter # UnusedImport here! # But importing the mappers and visitors module triggers routes registration from minecode import priority_router @@ -74,6 +76,66 @@ logger = logging.getLogger(__name__) +class CharMultipleWidget(widgets.TextInput): + """ + Enables the support for `MultiValueDict` `?field=a&field=b` + reusing the `SelectMultiple.value_from_datadict()` but render as a `TextInput`. + """ + def value_from_datadict(self, data, files, name): + value = widgets.SelectMultiple().value_from_datadict(data, files, name) + if not value or value == ['']: + return '' + + return value + + def format_value(self, value): + """ + Return a value as it should appear when rendered in a template. + """ + return ', '.join(value) + + +class MultipleCharField(MultipleChoiceField): + """ + Overrides `MultipleChoiceField` to fit in `MultipleCharFilter`. + """ + widget = CharMultipleWidget + + def valid_value(self, value): + return True + + +class MultipleCharFilter(MultipleChoiceFilter): + """ + Filters on multiple values for a CharField type using `?field=a&field=b` URL syntax. + """ + field_class = MultipleCharField + + +class MultipleCharInFilter(MultipleCharFilter): + """ + Does a __in = [value] filter instead of field=value filter + """ + def filter(self, qs, value): + if not value: + # Even though not a noop, no point filtering if empty. + return qs + + if self.is_noop(qs, value): + return qs + + predicate = self.get_filter_predicate(value) + old_field_name = next(iter(predicate)) + new_field_name = f'{old_field_name}__in' + predicate[new_field_name] = predicate[old_field_name] + predicate.pop(old_field_name) + + q = Q(**predicate) + qs = self.get_method(qs)(q) + + return qs.distinct() if self.distinct else qs + + class CreateListRetrieveUpdateViewSetMixin( mixins.CreateModelMixin, mixins.ListModelMixin, @@ -674,7 +736,7 @@ class CollectViewSet(viewsets.ViewSet): Return Package data for the purl passed in the `purl` query parameter. If the package does not exist, we will fetch the Package data and return - it in the same request. + it in the same request. Optionally, provide the list of addon_pipelines to run on the package. Find all addon pipelines [here.](https://scancodeio.readthedocs.io/en/latest/built-in-pipelines.html) @@ -693,7 +755,7 @@ class CollectViewSet(viewsets.ViewSet): # There is no OpenApiTypes.LIST https://github.com/tfranzel/drf-spectacular/issues/341 OpenApiParameter( - 'addon_pipelines', + 'addon_pipelines', build_array_type(build_basic_type(OpenApiTypes.STR)), 'query', description='Addon pipelines', ), @@ -704,17 +766,17 @@ def list(self, request, format=None): serializer = self.serializer_class(data=request.query_params) if not serializer.is_valid(): return Response( - {'errors': serializer.errors}, + {'errors': serializer.errors}, status=status.HTTP_400_BAD_REQUEST, ) validated_data = serializer.validated_data purl = validated_data.get('purl') - + kwargs = dict() if source_purl := validated_data.get('source_purl', None): kwargs["source_purl"] = source_purl - + if addon_pipelines := validated_data.get('addon_pipelines', []): kwargs["pipelines"] = addon_pipelines @@ -756,8 +818,8 @@ def index_packages(self, request, *args, **kwargs): """ Take a list of `packages` (where each item is a dictionary containing either PURL or versionless PURL along with vers range, optionally with source package PURL) - and index it. - Also each package can have list of `addon_pipelines` to run on the package. + and index it. + Also each package can have list of `addon_pipelines` to run on the package. Find all addon pipelines [here.](https://scancodeio.readthedocs.io/en/latest/built-in-pipelines.html) @@ -1057,7 +1119,7 @@ def get_resolved_packages(packages, supported_ecosystems): resolved_packages_by_purl[res_purl] = {'purl': res_purl} else: unsupported_vers.add(vers) - + unique_resolved_packages = resolved_packages_by_purl.values() return list(unique_resolved_packages), list(unsupported_purls), list(unsupported_vers)