Skip to content

Commit

Permalink
Merge pull request #3749 from ComputeCanada/checksums_external
Browse files Browse the repository at this point in the history
add support for checksums specified in external `checksums.json` file
  • Loading branch information
boegel authored Nov 22, 2022
2 parents 1ea5d57 + 86a6e31 commit 08a56e9
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 23 deletions.
150 changes: 134 additions & 16 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import copy
import glob
import inspect
import json
import os
import re
import stat
Expand All @@ -67,7 +68,7 @@
from easybuild.tools.build_details import get_build_stats
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, dry_run_warning, dry_run_set_dirs
from easybuild.tools.build_log import print_error, print_msg, print_warning
from easybuild.tools.config import DEFAULT_ENVVAR_USERS_MODULES
from easybuild.tools.config import CHECKSUM_PRIORITY_JSON, DEFAULT_ENVVAR_USERS_MODULES
from easybuild.tools.config import FORCE_DOWNLOAD_ALL, FORCE_DOWNLOAD_PATCHES, FORCE_DOWNLOAD_SOURCES
from easybuild.tools.config import build_option, build_path, get_log_filename, get_repository, get_repositorypath
from easybuild.tools.config import install_path, log_path, package_path, source_paths
Expand Down Expand Up @@ -156,6 +157,7 @@ def __init__(self, ec):
self.patches = []
self.src = []
self.checksums = []
self.json_checksums = None

# build/install directories
self.builddir = None
Expand Down Expand Up @@ -347,23 +349,55 @@ def get_checksum_for(self, checksums, filename=None, index=None):
Obtain checksum for given filename.
:param checksums: a list or tuple of checksums (or None)
:param filename: name of the file to obtain checksum for (Deprecated)
:param filename: name of the file to obtain checksum for
:param index: index of file in list
"""
# Filename has never been used; flag it as deprecated
if filename:
self.log.deprecated("Filename argument to get_checksum_for() is deprecated", '5.0')
checksum = None

# sometimes, filename are specified as a dict
if isinstance(filename, dict):
filename = filename['filename']

# if checksums are provided as a dict, lookup by source filename as key
if isinstance(checksums, (list, tuple)):
if isinstance(checksums, dict):
if filename is not None and filename in checksums:
checksum = checksums[filename]
else:
checksum = None
elif isinstance(checksums, (list, tuple)):
if index is not None and index < len(checksums) and (index >= 0 or abs(index) <= len(checksums)):
return checksums[index]
checksum = checksums[index]
else:
return None
checksum = None
elif checksums is None:
return None
checksum = None
else:
raise EasyBuildError("Invalid type for checksums (%s), should be dict, list, tuple or None.",
type(checksums))

if checksum is None or build_option("checksum_priority") == CHECKSUM_PRIORITY_JSON:
json_checksums = self.get_checksums_from_json()
return json_checksums.get(filename, None)
else:
raise EasyBuildError("Invalid type for checksums (%s), should be list, tuple or None.", type(checksums))
return checksum

def get_checksums_from_json(self, always_read=False):
"""
Get checksums for this software that are provided in a checksums.json file
:param: always_read: always read the checksums.json file, even if it has been read before
"""
if always_read or self.json_checksums is None:
try:
path = self.obtain_file("checksums.json", no_download=True)
self.log.info("Loading checksums from file %s", path)
json_txt = read_file(path)
self.json_checksums = json.loads(json_txt)
# if the file can't be found, return an empty dict
except EasyBuildError:
self.json_checksums = {}

return self.json_checksums

def fetch_source(self, source, checksum=None, extension=False, download_instructions=None):
"""
Expand Down Expand Up @@ -445,7 +479,8 @@ def fetch_sources(self, sources=None, checksums=None):
if source is None:
raise EasyBuildError("Empty source in sources list at index %d", index)

src_spec = self.fetch_source(source, self.get_checksum_for(checksums=checksums, index=index))
checksum = self.get_checksum_for(checksums=checksums, filename=source, index=index)
src_spec = self.fetch_source(source, checksum=checksum)
if src_spec:
self.src.append(src_spec)
else:
Expand Down Expand Up @@ -477,7 +512,7 @@ def fetch_patches(self, patch_specs=None, extension=False, checksums=None):
if path:
self.log.debug('File %s found for patch %s', path, patch_spec)
patch_info['path'] = path
patch_info['checksum'] = self.get_checksum_for(checksums, index=index)
patch_info['checksum'] = self.get_checksum_for(checksums, filename=patch_info['name'], index=index)

if extension:
patches.append(patch_info)
Expand Down Expand Up @@ -638,7 +673,7 @@ def collect_exts_file_info(self, fetch_files=True, verify_checksums=True):

# verify checksum (if provided)
self.log.debug('Verifying checksums for extension source...')
fn_checksum = self.get_checksum_for(checksums, index=0)
fn_checksum = self.get_checksum_for(checksums, filename=src_fn, index=0)
if verify_checksum(src_path, fn_checksum):
self.log.info('Checksum for extension source %s verified', src_fn)
elif build_option('ignore_checksums'):
Expand Down Expand Up @@ -672,7 +707,7 @@ def collect_exts_file_info(self, fetch_files=True, verify_checksums=True):
patch = patch['path']
patch_fn = os.path.basename(patch)

checksum = self.get_checksum_for(checksums[1:], index=idx)
checksum = self.get_checksum_for(checksums, filename=patch_fn, index=idx+1)
if verify_checksum(patch, checksum):
self.log.info('Checksum for extension patch %s verified', patch_fn)
elif build_option('ignore_checksums'):
Expand All @@ -694,7 +729,7 @@ def collect_exts_file_info(self, fetch_files=True, verify_checksums=True):
return exts_sources

def obtain_file(self, filename, extension=False, urls=None, download_filename=None, force_download=False,
git_config=None, download_instructions=None, alt_location=None):
git_config=None, no_download=False, download_instructions=None, alt_location=None):
"""
Locate the file with the given name
- searches in different subdirectories of source path
Expand All @@ -705,6 +740,7 @@ def obtain_file(self, filename, extension=False, urls=None, download_filename=No
:param download_filename: filename with which the file should be downloaded, and then renamed to <filename>
:param force_download: always try to download file, even if it's already available in source path
:param git_config: dictionary to define how to download a git repository
:param no_download: do not try to download the file
:param download_instructions: instructions to manually add source (used for complex cases)
:param alt_location: alternative location to use instead of self.name
"""
Expand Down Expand Up @@ -818,6 +854,13 @@ def obtain_file(self, filename, extension=False, urls=None, download_filename=No
if self.dry_run:
self.dry_run_msg(" * %s found at %s", filename, foundfile)
return foundfile
elif no_download:
if self.dry_run:
self.dry_run_msg(" * %s (MISSING)", filename)
return filename
else:
raise EasyBuildError("Couldn't find file %s anywhere, and downloading it is disabled... "
"Paths attempted (in order): %s ", filename, ', '.join(failedpaths))
elif git_config:
return get_source_tarball_from_git(filename, targetdir, git_config)
else:
Expand Down Expand Up @@ -2280,7 +2323,7 @@ def fetch_step(self, skip_checksums=False):

# fetch patches
if self.cfg['patches'] + self.cfg['postinstallpatches']:
if isinstance(self.cfg['checksums'], (list, tuple)):
if self.cfg['checksums'] and isinstance(self.cfg['checksums'], (list, tuple)):
# if checksums are provided as a list, first entries are assumed to be for sources
patches_checksums = self.cfg['checksums'][len(self.cfg['sources']):]
else:
Expand Down Expand Up @@ -2367,6 +2410,20 @@ def check_checksums_for(self, ent, sub='', source_cnt=None):
patches = ent.get('patches', [])
checksums = ent.get('checksums', [])

if not checksums:
checksums_from_json = self.get_checksums_from_json()
# recreate a list of checksums. If each filename is found, the generated list of checksums should match
# what is expected in list format
for fn in sources + patches:
# if the filename is a tuple, the actual source file name is the first element
if isinstance(fn, tuple):
fn = fn[0]
# if the filename is a dict, the actual source file name is the "filename" element
if isinstance(fn, dict):
fn = fn["filename"]
if fn in checksums_from_json.keys():
checksums += [checksums_from_json[fn]]

if source_cnt is None:
source_cnt = len(sources)
patch_cnt, checksum_cnt = len(patches), len(checksums)
Expand Down Expand Up @@ -4406,6 +4463,67 @@ class StopException(Exception):
pass


def inject_checksums_to_json(ecs, checksum_type):
"""
Inject checksums of given type in corresponding json files
:param ecs: list of EasyConfig instances to calculate checksums and inject them into checksums.json
:param checksum_type: type of checksum to use
"""
for ec in ecs:
ec_fn = os.path.basename(ec['spec'])
ec_dir = os.path.dirname(ec['spec'])
print_msg("injecting %s checksums for %s in checksums.json" % (checksum_type, ec['spec']), log=_log)

# get easyblock instance and make sure all sources/patches are available by running fetch_step
print_msg("fetching sources & patches for %s..." % ec_fn, log=_log)
app = get_easyblock_instance(ec)
app.update_config_template_run_step()
app.fetch_step(skip_checksums=True)

# compute & inject checksums for sources/patches
print_msg("computing %s checksums for sources & patches for %s..." % (checksum_type, ec_fn), log=_log)
checksums = {}
for entry in app.src + app.patches:
checksum = compute_checksum(entry['path'], checksum_type)
print_msg("* %s: %s" % (os.path.basename(entry['path']), checksum), log=_log)
checksums[os.path.basename(entry['path'])] = checksum

# compute & inject checksums for extension sources/patches
if app.exts:
print_msg("computing %s checksums for extensions for %s..." % (checksum_type, ec_fn), log=_log)

for ext in app.exts:
# compute checksums for extension sources & patches
if 'src' in ext:
src_fn = os.path.basename(ext['src'])
checksum = compute_checksum(ext['src'], checksum_type)
print_msg(" * %s: %s" % (src_fn, checksum), log=_log)
checksums[src_fn] = checksum
for ext_patch in ext.get('patches', []):
patch_fn = os.path.basename(ext_patch['path'])
checksum = compute_checksum(ext_patch['path'], checksum_type)
print_msg(" * %s: %s" % (patch_fn, checksum), log=_log)
checksums[patch_fn] = checksum

# actually inject new checksums or overwrite existing ones (if --force)
existing_checksums = app.get_checksums_from_json(always_read=True)
for filename in checksums:
if filename not in existing_checksums:
existing_checksums[filename] = checksums[filename]
# don't do anything if the checksum already exist and is the same
elif checksums[filename] != existing_checksums[filename]:
if build_option('force'):
print_warning("Found existing checksums for %s, overwriting them (due to --force)..." % ec_fn)
existing_checksums[filename] = checksums[filename]
else:
raise EasyBuildError("Found existing checksum for %s, use --force to overwrite them" % filename)

# actually write the checksums
with open(os.path.join(ec_dir, 'checksums.json'), 'w') as outfile:
json.dump(existing_checksums, outfile, indent=2, sort_keys=True)


def inject_checksums(ecs, checksum_type):
"""
Inject checksums of given type in specified easyconfig files
Expand Down
11 changes: 9 additions & 2 deletions easybuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
:author: Toon Willems (Ghent University)
:author: Ward Poelmans (Ghent University)
:author: Fotis Georgatos (Uni.Lu, NTUA)
:author: Maxime Boissonneault (Compute Canada)
"""
import copy
import os
Expand All @@ -45,7 +46,7 @@
# expect missing log output when this not the case!
from easybuild.tools.build_log import EasyBuildError, print_error, print_msg, print_warning, stop_logging

from easybuild.framework.easyblock import build_and_install_one, inject_checksums
from easybuild.framework.easyblock import build_and_install_one, inject_checksums, inject_checksums_to_json
from easybuild.framework.easyconfig import EASYCONFIGS_PKG_SUBDIR
from easybuild.framework.easystack import parse_easystack
from easybuild.framework.easyconfig.easyconfig import clean_up_easyconfigs
Expand Down Expand Up @@ -425,7 +426,8 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
sys.exit(31) # exit -> 3x1t -> 31

# read easyconfig files
easyconfigs, generated_ecs = parse_easyconfigs(paths, validate=not options.inject_checksums)
validate = not options.inject_checksums and not options.inject_checksums_to_json
easyconfigs, generated_ecs = parse_easyconfigs(paths, validate=validate)

# handle --check-contrib & --check-style options
if run_contrib_style_checks([ec['ec'] for ec in easyconfigs], options.check_contrib, options.check_style):
Expand Down Expand Up @@ -453,6 +455,7 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):

keep_available_modules = forced or dry_run_mode or options.extended_dry_run or pr_options or options.copy_ec
keep_available_modules = keep_available_modules or options.inject_checksums or options.sanity_check_only
keep_available_modules = keep_available_modules or options.inject_checksums_to_json

# skip modules that are already installed unless forced, or unless an option is used that warrants not skipping
if not keep_available_modules:
Expand Down Expand Up @@ -538,8 +541,12 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
with rich_live_cm():
inject_checksums(ordered_ecs, options.inject_checksums)

elif options.inject_checksums_to_json:
inject_checksums_to_json(ordered_ecs, options.inject_checksums_to_json)

# cleanup and exit after dry run, searching easyconfigs or submitting regression test
stop_options = [options.check_conflicts, dry_run_mode, options.dump_env_script, options.inject_checksums]
stop_options += [options.inject_checksums_to_json]
if any(no_ec_opts) or any(stop_options):
clean_exit(logfile, eb_tmpdir, testing)

Expand Down
7 changes: 7 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
:author: Ward Poelmans (Ghent University)
:author: Damian Alvarez (Forschungszentrum Juelich GmbH)
:author: Andy Georges (Ghent University)
:author: Maxime Boissonneault (Compute Canada)
"""
import copy
import glob
Expand Down Expand Up @@ -126,6 +127,11 @@
FORCE_DOWNLOAD_CHOICES = [FORCE_DOWNLOAD_ALL, FORCE_DOWNLOAD_PATCHES, FORCE_DOWNLOAD_SOURCES]
DEFAULT_FORCE_DOWNLOAD = FORCE_DOWNLOAD_SOURCES

CHECKSUM_PRIORITY_JSON = "json"
CHECKSUM_PRIORITY_EASYCONFIG = "easyconfig"
CHECKSUM_PRIORITY_CHOICES = [CHECKSUM_PRIORITY_JSON, CHECKSUM_PRIORITY_EASYCONFIG]
DEFAULT_CHECKSUM_PRIORITY = CHECKSUM_PRIORITY_EASYCONFIG

# package name for generic easyblocks
GENERIC_EASYBLOCK_PKG = 'generic'

Expand Down Expand Up @@ -180,6 +186,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'accept_eula_for',
'aggregate_regtest',
'backup_modules',
'checksum_priority',
'container_config',
'container_image_format',
'container_image_name',
Expand Down
Loading

0 comments on commit 08a56e9

Please sign in to comment.