Skip to content

Commit

Permalink
Merge pull request #12437 from 0xc0170/dev_travis_scancode
Browse files Browse the repository at this point in the history
travis: add PR license check for missing/not-valid license files
  • Loading branch information
0xc0170 authored Feb 24, 2020
2 parents 9205bdf + a0248c1 commit bb48fa4
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 2 deletions.
34 changes: 32 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,40 @@ matrix:
- <<: *basic-vm
name: "license check"
env: NAME=licence_check
language: python
python: 3.6.8 # scancode-toolkit v3.1.1 requires v3.6.8
install:
- pip install scancode-toolkit==3.1.1
before_script:
- mkdir -p SCANCODE
# Fetch remaining information needed for branch comparison
- git fetch --all --unshallow --tags
- git fetch origin "${TRAVIS_BRANCH}"
script:
# scancode does not support list of files, only one file or directory
# we use SCANCODE directory for all changed files (their copies with full tree)
- >-
git diff --name-only --diff-filter=d FETCH_HEAD..HEAD \
| ( grep '.\(c\|cpp\|h\|hpp\|py\)$' || true ) \
| ( grep -v '^tools/test/toolchains/api_test.py') \
| while read file; do cp --parents "${file}" SCANCODE; done
- scancode -l --json-pp scancode.json SCANCODE
- python ./tools/test/travis-ci/scancode-evaluate.py -f scancode.json || true
after_success:
- python ./tools/test/travis-ci/scancode-evaluate.py -f scancode.json
- cat scancode-evaluate.log
- retval=$?
- |
! grep --recursive --max-count=100 --ignore-case --exclude .travis.yml \
"gnu general\|gnu lesser\|lesser general\|public license"
if [ $retval == 0 ]; then
echo "License check OK";
else
echo "License check failed, please review license issues found";
COUNT=$(cat scancode-evaluate.log | grep File: | wc -l)
STATUSM="Needs review, ${COUNT} license issues found";
set_status "success" "$STATUSM";
fi
- <<: *basic-vm
name: "include check"
Expand Down
133 changes: 133 additions & 0 deletions tools/test/travis-ci/scancode-evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
SPDX-License-Identifier: Apache-2.0
Copyright (c) 2020 Arm Limited. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations
"""

# Asumptions for this script:
# 1. directory_name is scanned directory.
# Files are copied to this directory with full tree. As result, if we find
# license offender, we can have full path (just scrape directory_name). We do this
# magic because scancode allows to scan directories/one file.
# 2. SPDX and license text is a must for all code files

import json
import argparse
import sys
import os.path
import logging
import re

userlog = logging.getLogger("scancode-evaluate")
userlog.setLevel(logging.INFO)
logfile = os.path.join(os.getcwd(), 'scancode-evaluate.log')
log_file_handler = logging.FileHandler(logfile, mode='w')
userlog.addHandler(log_file_handler)

MISSING_LICENSE_TEXT = "Missing license header"
MISSING_PERMISIVE_LICENSE_TEXT = "Non-permissive license"
MISSING_SPDX_TEXT = "Missing SPDX license identifier"

def license_check(directory_name, file):
""" Check licenses in the scancode json file for specified directory
This function does not verify if file exists, should be done prior the call.
Args:
directory_name - where scancode was run, used to scrape this from paths
file - scancode json output file (output from scancode --license --json-pp)
Returns:
0 if nothing found
>0 - count how many license isses found
-1 if any error in file licenses found
"""

offenders = []
try:
# find all licenses in the files, must be licensed and permissive
with open(file, 'r') as scancode_output:
results = json.load(scancode_output)
except ValueError:
userlog.warning("JSON could not be decoded")
return -1

try:
for file in results['files']:
license_offender = {}
license_offender['file'] = file
# ignore directory, not relevant here
if license_offender['file']['type'] == 'directory':
continue
if not license_offender['file']['licenses']:
license_offender['reason'] = MISSING_LICENSE_TEXT
offenders.append(license_offender)
continue

found_spdx = False
for i in range(len(license_offender['file']['licenses'])):
if license_offender['file']['licenses'][i]['category'] != 'Permissive':
license_offender['reason'] = MISSING_PERMISIVE_LICENSE_TEXT
offenders.append(license_offender)
# find SPDX, it shall be one of licenses found
if license_offender['file']['licenses'][i]['matched_rule']['identifier'].find("spdx") != -1:
found_spdx = True

if not found_spdx:
try:
# Issue reported here https://github.com/nexB/scancode-toolkit/issues/1913
# We verify here if SPDX is not really there as SDPX is part of the license text
# scancode has some problems detecting it properly
with open(os.path.join(os.path.abspath(license_offender['file']['path'])), 'r') as spdx_file_check:
filetext = spdx_file_check.read()
matches = re.findall("SPDX-License-Identifier:?", filetext)
if matches:
continue
license_offender['reason'] = MISSING_SPDX_TEXT
offenders.append(license_offender)
except UnicodeDecodeError:
# not valid file for license check
continue
except KeyError:
userlog.warning("Invalid scancode json file")
return -1

if offenders:
userlog.warning("Found files with missing license details, please review and fix")
for offender in offenders:
userlog.warning("File: " + offender['file']['path'][len(directory_name):] + " " + "reason: " + offender['reason'])
return len(offenders)

def parse_args():
parser = argparse.ArgumentParser(
description="License check.")
parser.add_argument('-f', '--file',
help="scancode-toolkit output json file")
parser.add_argument('-d', '--directory_name', default="SCANCODE",
help='Directory name where are files being checked')
return parser.parse_args()

if __name__ == "__main__":

args = parse_args()
if args.file and os.path.isfile(args.file):
count = license_check(args.directory_name, args.file)
if count == 0:
sys.exit(0)
else:
sys.exit(-1)
else:
userlog.warning("Could not find the scancode json file")
sys.exit(-1)

0 comments on commit bb48fa4

Please sign in to comment.