Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added get-scan-set.py to utils scripts to return a list of non-ignored files for processing #47

Merged
merged 18 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ RUN python3 -m pip install --no-cache-dir \
# YAML (Checkov, cfn-nag)
#
RUN echo "gem: --no-document" >> /etc/gemrc && \
python3 -m pip install checkov && \
python3 -m pip install checkov pathspec && \
gem install cfn-nag

#
Expand Down Expand Up @@ -120,14 +120,17 @@ RUN mkdir -p /src && \
# Install CDK Nag stub dependencies
#
# Update NPM to latest
COPY ./utils /ash/utils/
RUN mkdir -p /ash/utils
COPY ./utils/cdk-nag-scan /ash/utils/cdk-nag-scan/
RUN npm install -g npm && \
cd /ash/utils/cdk-nag-scan && \
npm install --quiet

#
# COPY ASH source to /ash instead of / to isolate
#
COPY ./utils/cfn-to-cdk /ash/utils/cfn-to-cdk/
COPY ./utils/*.* /ash/utils/
COPY ./appsec_cfn_rules /ash/appsec_cfn_rules/
COPY ./ash-multi /ash/ash

Expand Down
10 changes: 5 additions & 5 deletions ash
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ export ASH_IMAGE_NAME=${ASH_IMAGE_NAME:-"automated-security-helper:local"}
# Set local variables
SOURCE_DIR=""
OUTPUT_DIR=""
OCI_RUNNER=""
DOCKER_EXTRA_ARGS=""
ASH_ARGS=""
NO_BUILD="NO"
Expand Down Expand Up @@ -93,7 +92,7 @@ if [[ "${RESOLVED_OCI_RUNNER}" == "" ]]; then
exit 1
# else, build and run the image
else
if [ "${DEBUG}" = "YES" ]; then
if [[ "${DEBUG}" = "YES" ]]; then
set -x
fi
echo "Resolved OCI_RUNNER to: ${RESOLVED_OCI_RUNNER}"
Expand All @@ -116,15 +115,16 @@ else
--rm \
-e ACTUAL_SOURCE_DIR=${SOURCE_DIR} \
-e ACTUAL_OUTPUT_DIR=${OUTPUT_DIR} \
--mount type=bind,source="${SOURCE_DIR}",destination=/src,readonly \
--mount type=bind,source="${OUTPUT_DIR}",destination=/out \
-e ASH_DEBUG=${DEBUG} \
--mount type=bind,source="${SOURCE_DIR}",destination=/src,readonly,bind-propagation=shared \
--mount type=bind,source="${OUTPUT_DIR}",destination=/out,bind-propagation=shared \
--tmpfs /run/scan/src:rw,noexec,nosuid ${ASH_IMAGE_NAME} \
ash \
--source-dir /src \
--output-dir /out \
$ASH_ARGS
fi
if [ "${DEBUG}" = "YES" ]; then
if [[ "${DEBUG}" = "YES" ]]; then
set +x
fi
fi
67 changes: 33 additions & 34 deletions ash-multi
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,7 @@ print_usage() {
echo -e "\t-q | --quiet Don't print verbose text about the build process."
echo -e "\t-c | --no-color Don't print colorized output."
echo -e "\t-s | --single-process Run ash scanners serially rather than as separate, parallel sub-processes."
echo -e "\t-o | --oci-runner Use the specified OCI runner instead of docker to run the containerized tools."
echo -e "\t-f | --finch Use finch instead of docker to run the containerized tools."
echo -e "\t WARNING: The '--finch|-f' option is deprecated and will be removed in a future"
echo -e "\t release. Please switch to using '--oci-runner finch' in scripts instead.\n"
echo -e "\t-o | --oci-runner Use the specified OCI runner instead of docker to run the containerized tools.\n"
echo -e "For more information please visit https://github.com/awslabs/automated-security-helper"
}

Expand All @@ -78,18 +75,19 @@ get_all_files() {
pushd . >/dev/null 2>&1
# cd to the source directory as a starting point
cd ${_ASH_SOURCE_DIR}
# Check if the source directory is a git repository and clone it to the run directory
if [[ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" == "true" ]]; then
echo "Source is a git repository. Using git ls-files to exclude files from scanning."
src_files=$(git ls-files)
else
echo "Source is not a git repository. Using find to list all files instead."
src_files=$(find "${_ASH_SOURCE_DIR}" \( -path '*/node_modules*' -prune -o -path '*/cdk.out*' -prune -o -path '*/.venv*' -prune -o -path '*/venv*' -prune \) -o -type f -name '*')
fi;
src_files=$(python "${_ASH_UTILS_LOCATION}/get-scan-set.py" $(pwd))
# # Check if the source directory is a git repository and clone it to the run directory
# if [[ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" == "true" ]]; then
# echo "Source is a git repository. Using git ls-files to exclude files from scanning."
# src_files=$($(python "${_ASH_UTILS_LOCATION}/get-scan-set.py" ${_ASH_SOURCE_DIR})
# # src_files=$(git ls-files)
# else
# echo "Source is not a git repository. Using find to list all files instead."
# src_files=$(find "${_ASH_SOURCE_DIR}" \( -path '*/node_modules*' -prune -o -path '*/cdk.out*' -prune -o -path '*/.venv*' -prune -o -path '*/venv*' -prune \) -o -type f -name '*')
# fi;
popd >/dev/null 2>&1

all_files+=( "$src_files" )

}

# shellcheck disable=SC2120
Expand Down Expand Up @@ -166,19 +164,12 @@ validate_input() {
if [[ -z ${SOURCE_DIR} ]]; then SOURCE_DIR="$(pwd)"; else SOURCE_DIR=$(cd "${SOURCE_DIR}"; pwd); fi # Transform any relative path to absolute
if [[ -z ${OUTPUT_DIR} ]]; then
OUTPUT_DIR="$(pwd)"
# Create the OUTPUT_DIR/work recursively if it doesn't already exist.
# -p flag is included will create missing parent dirs and skip if
# the dir already exists.
mkdir -p "${OUTPUT_DIR}/work"
else
# Create the OUTPUT_DIR/work recursively if it doesn't already exist.
# -p flag is included will create missing parent dirs and skip if
# the dir already exists.
mkdir -p "${OUTPUT_DIR}/work"
# The mkdir call needs to be done before absolute path resolution in case
# OUTPUT_DIR itself doesn't exist yet.
OUTPUT_DIR=$(cd "${OUTPUT_DIR}"; pwd) # Transform any relative path to absolute
fi
if [ -d "${OUTPUT_DIR}/work" ]; then
rm -rf "${OUTPUT_DIR}/work"
fi
mkdir -p "${OUTPUT_DIR}/work"
OUTPUT_DIR=$(cd "${OUTPUT_DIR}"; pwd) # Transform any relative path to absolute
CFNRULES_LOCATION=$(cd "${CFNRULES_LOCATION}"; pwd) # Transform any relative path to absolute
UTILS_LOCATION=$(cd "${UTILS_LOCATION}"; pwd) # Transform any relative path to absolute
}
Expand Down Expand Up @@ -292,7 +283,7 @@ run_security_check() {

set -e
START_TIME=$(date +%s)
VERSION=("1.2.0-e-06Mar2024")
VERSION=("1.2.3-e-15Mar2024")
OCI_RUNNER="docker"

# Overrides default OCI Runner used by ASH
Expand All @@ -303,8 +294,8 @@ GIT_EXTENSIONS=("git")
PY_EXTENSIONS=("py" "pyc" "ipynb")
INFRA_EXTENSIONS=("yaml" "yml" "tf" "json" "dockerfile")
CFN_EXTENSIONS=("yaml" "yml" "json" "template")
JS_EXTENSIONS=("js")
GRYPE_EXTENSIONS=("js" "py" "java" "go" "cs" "sh")
JS_EXTENSIONS=("js" "jsx" "ts" "tsx")
GRYPE_EXTENSIONS=("js" "jsx" "ts" "tsx" "py" "java" "go" "cs" "sh")

DOCKERFILE_LOCATION="$(dirname "${BASH_SOURCE[0]}")"/"helper_dockerfiles"
UTILS_LOCATION="$(dirname "${BASH_SOURCE[0]}")"/"utils"
Expand Down Expand Up @@ -446,12 +437,16 @@ echo -e "\n${LPURPLE}ASH version ${GREEN}$VERSION${NC}\n"

# nosemgrep
IFS=$'\n' # Support directories with spaces, make the loop iterate over newline instead of space
# Extract all zip files to temp dir *within $OUTPUT_DIR* before scanning
for zipfile in $(find "${SOURCE_DIR}" -iname "*.zip");
pushd . >/dev/null 2>&1
cd "${SOURCE_DIR}"
# for zipfile in $(find "${SOURCE_DIR}" -iname "*.zip");
for zipfile in $(python "${_ASH_UTILS_LOCATION}/get-scan-set.py" . | grep '\.zip$');
do
unzip ${QUIET_OUTPUT} -d "${OUTPUT_DIR}"/work/$(basename "${zipfile%.*}") $zipfile
tgt_dir=$(dirname "${OUTPUT_DIR}"/work/"${zipfile:2}")/"$(basename "${zipfile%.*}")"
mkdir -p "${tgt_dir}"
unzip ${QUIET_OUTPUT} -d "${tgt_dir}" $zipfile
done

popd >/dev/null 2>&1
unset IFS

declare -a all_files='' # Variable will be populated inside 'map_extensions_and_files' block
Expand Down Expand Up @@ -585,7 +580,9 @@ for pid in "${JOBS[@]}"; do
done

# Cleanup any previous file
rm -f "${OUTPUT_DIR}"/"${AGGREGATED_RESULTS_REPORT_FILENAME}"
if [[ -n "${AGGREGATED_RESULTS_REPORT_FILENAME}" && -n "${OUTPUT_DIR}" && -f "${OUTPUT_DIR}"/"${AGGREGATED_RESULTS_REPORT_FILENAME}" ]]; then
rm -f "${OUTPUT_DIR}"/"${AGGREGATED_RESULTS_REPORT_FILENAME}"
fi

# if an extension was not found, no report file will be in place, so skip the final report
if [[ $(find "${OUTPUT_DIR}/work" -iname "*_report_result.txt" | wc -l | awk '{print $1}') -gt 0 ]];
Expand All @@ -603,7 +600,9 @@ then
done

# Cleanup work directory containing all temp files
rm -rf "${OUTPUT_DIR}"/work
if [[ -n "${OUTPUT_DIR}" && -d "${OUTPUT_DIR}"/work ]]; then
rm -rf "${OUTPUT_DIR}"/work
fi

RESOLVED_OUTPUT_DIR=${ACTUAL_OUTPUT_DIR:-${OUTPUT_DIR}}
echo -e "${GREEN}\nYour final report can be found here:${NC} ${RESOLVED_OUTPUT_DIR}/${AGGREGATED_RESULTS_REPORT_FILENAME}"
Expand Down
17 changes: 13 additions & 4 deletions utils/cdk-docker-execute.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash

abs() { # compute the absolute value of the input parameter
input=$1
if [[ $input -lt 0 ]]; then
Expand Down Expand Up @@ -30,6 +31,8 @@ _ASH_UTILS_LOCATION=${_ASH_UTILS_LOCATION:-/utils}
_ASH_CFNRULES_LOCATION=${_ASH_CFNRULES_LOCATION:-/cfnrules}
_ASH_RUN_DIR=${_ASH_RUN_DIR:-/run/scan/src}

source ${_ASH_UTILS_LOCATION}/common.sh

#
# Allow the container to run Git commands against a repo in ${_ASH_SOURCE_DIR}
#
Expand All @@ -41,7 +44,8 @@ cd ${_ASH_SOURCE_DIR}
# Check if the source directory is a git repository and clone it to the run directory
if [[ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" == "true" ]]; then
if [[ "$_ASH_EXEC_MODE" != "local" ]]; then
git clone ${_ASH_SOURCE_DIR} ${_ASH_RUN_DIR} >/dev/null 2>&1
debug_echo "Shallow cloning git repo to ${_ASH_RUN_DIR} to remove ignored files from being scanned"
git clone --depth=1 --single-branch ${_ASH_SOURCE_DIR} ${_ASH_RUN_DIR} >/dev/null 2>&1
fi
_ASH_SOURCE_DIR=${_ASH_RUN_DIR}
cd ${_ASH_RUN_DIR}
Expand Down Expand Up @@ -79,8 +83,7 @@ cd ${_ASH_OUTPUT_DIR}
#
DIRECTORY="ash_cf2cdk_output"
# Check if this directory already exist from previous ASH run
if [ -d "${_ASH_OUTPUT_DIR}/$DIRECTORY" ]; then
# Delete this directory and its files and recreate it.
if [[ -n "${_ASH_OUTPUT_DIR}" && -d "${_ASH_OUTPUT_DIR}/$DIRECTORY" ]]; then
rm -rf "${_ASH_OUTPUT_DIR}/$DIRECTORY"
fi
mkdir -p "${_ASH_OUTPUT_DIR}/$DIRECTORY" 2> /dev/null
Expand All @@ -103,6 +106,7 @@ RC=0
# cdk --version >> ${REPORT_PATH}
# echo "----------------------" >> ${REPORT_PATH}

debug_echo "Starting all scanners within the CDK scanner tool set"
echo -e "\nstarting to investigate ..." >> ${REPORT_PATH}

cfn_files=($(readlink -f $(grep -lri 'AWSTemplateFormatVersion' ${_ASH_SOURCE_DIR} --exclude-dir={cdk.out,utils,.aws-sam,ash_cf2cdk_output} --exclude=ash) 2>/dev/null))
Expand All @@ -123,6 +127,7 @@ npm install --silent
# Now, for each file, run a cdk synth to subject the file to CDK-NAG scanning
#
if [ "${#cfn_files[@]}" -gt 0 ]; then
debug_echo "Found CloudFormation files to scan, starting scan"
echo "found ${#cfn_files[@]} files to scan. Starting scans ..." >> ${REPORT_PATH}

for file in "${cfn_files[@]}"; do
Expand All @@ -137,6 +142,7 @@ if [ "${#cfn_files[@]}" -gt 0 ]; then
# Use CDK to synthesize the CDK application,
# running CDK-NAG on the inserted CloudFormation template
#
debug_echo "Importing CloudFormation template file ${file} to apply CDK Nag rules against it"
npx cdk synth --context fileName="${file}" --quiet 2>> ${REPORT_PATH}
CRC=$?
echo "<<<<<< end cdk-nag result for ${cfn_filename} <<<<<<" >> ${REPORT_PATH}
Expand All @@ -161,9 +167,12 @@ unset IFS
#
# Clean up the CDK application temporary working folder
#
rm -rf ${CDK_WORK_DIR}
if [[ -n "${CDK_WORK_DIR}" && -d "${CDK_WORK_DIR}" ]]; then
rm -rf ${CDK_WORK_DIR}
fi

# cd back to the original folder in case path changed during scan
cd ${_CURRENT_DIR}

debug_echo "Finished all scanners within the CDK scanner tool set"
exit $RC
14 changes: 14 additions & 0 deletions utils/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
export ASH_ROOT_DIR="$(cd $(dirname "$(dirname "$0")"); pwd)"
export ASH_UTILS_DIR="${ASH_ROOT_DIR}/utils"

# LPURPLE='\033[1;35m'
# LGRAY='\033[0;37m'
# GREEN='\033[0;32m'
# RED='\033[0;31m'
# YELLOW='\033[0;33m'
# CYAN='\033[0;36m'
# NC='\033[0m' # No Color

debug_echo() {
[[ "${ASH_DEBUG:-"NO"}" != "NO" ]] && echo -e "\033[0;33m[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG:\033[0m ${1}"
}
101 changes: 101 additions & 0 deletions utils/get-scan-set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import sys
from typing import List
from pathspec import PathSpec
import argparse
import os
from glob import glob

ASH_INCLUSIONS=[
"**/cdk.out/asset.*",
"!**/*.template.json", # CDK output template default path pattern
]

def get_ash_ignorespec_lines(
path,
ignorefiles: List[str] = []
) -> List[str]:
ashignores = [
f"{path}/.ashignore",
*[
item
for item in glob(f"{path}/**/.ashignore")
]
]
semgrepignores = [
f"{path}/.semgrepignore",
*[
item
for item in glob(f"{path}/**/.semgrepignore")
]
]
gitignores = [
f"{path}/.gitignore",
*[
item
for item in glob(f"{path}/**/.gitignore")
]
]
all_ignores = list(set([
*gitignores,
*semgrepignores,
*ashignores,
*[
f"{path}/{file}"
for file in ignorefiles
]
]))
lines = ['.git']
for ignorefile in all_ignores:
if os.path.isfile(ignorefile):
# print(f"Reading: {ignorefile}", file=sys.stderr)
with open(ignorefile) as f:
lines.extend(f.readlines())
lines = [ line.strip() for line in lines ]
lines.extend(ASH_INCLUSIONS)
return lines

def get_ash_ignorespec(
lines: List[str],
) -> PathSpec:
spec = PathSpec.from_lines('gitwildmatch', lines)
return spec

def get_files_not_matching_spec(
path,
spec,
):
full = []
included = []
for item in os.walk(path):
for file in item[2]:
full.append(os.path.join(item[0], file))
if not spec.match_file(os.path.join(item[0], file)):
inc_full = os.path.join(item[0], file)
# print(f"Including: {inc_full}", file=sys.stderr)
included.append(inc_full)
included = sorted(set(included))
return included

if __name__ == "__main__":
# set up argparse
parser = argparse.ArgumentParser(description="Get list of files not matching .gitignore underneath SourceDir arg path")
parser.add_argument("path", help="path to scan", default=os.getcwd(), type=str, nargs='?')
parser.add_argument("--ignorefile", help="ignore file to use in addition to the standard gitignore", default=[], type=str, nargs='*')
args = parser.parse_args()

lines = get_ash_ignorespec_lines(args.path, args.ignorefile)
# semgrepignore_path = os.path.join(args.path, ".semgrepignore")
# if not os.path.exists(semgrepignore_path):
# with open(semgrepignore_path, "w") as f:
# f.write("\n".join(lines))

spec = get_ash_ignorespec(lines)

files = get_files_not_matching_spec(args.path, spec)
for file in files:
# print(f"Returning: {file}", file=sys.stderr)
print(file, file=sys.stdout)
Loading