Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unpacking to Singularity #305

Draft
wants to merge 8 commits into
base: 1.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions reprounzip-singularity/Singularity_multiple_runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Singularity unpacker for reprozip
import subprocess
import os
import sys
import re
from shutil import copyfile
import yaml

#TODO:
# 1. Logging error for every bash command
# 2. Using existing reprozip utility functions for common tasks like copying busy box
# 3. Comments
# 4. integrating with exisitng unpacking coding design
# 5. Improving code structure

def extract_reprozip_file(filename):
bashCommand = " tar -xf {}".format(filename)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should probably pass a list directly here, rather than format() then split()

output, error = process.communicate()
data_file="DATA.tar.gz"
if data_file:
bashCommand = " tar -xf {}".format(data_file)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
Copy link
Member Author

@remram44 remram44 Jun 21, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use --strip-components=1 to get rid of the DATA name.

But using tarfile is the way you should go eventually, so that you don't have to write DATA.tar.gz to disk, and because eventually you are writing into a new tar file instead of a disk folder.


args = sys.argv[1:]
filename, IMAGE_DIR = args

if not os.path.exists(IMAGE_DIR):
os.makedirs(IMAGE_DIR)
os.chdir(IMAGE_DIR)
filename = "../"+filename
extract_reprozip_file(filename)

SINGULARITY_DIR = "DATA/.singularity.d"
run_env_file="90-environment.sh"
apps_file="95-apps.sh"
base_file="99-base.sh"
ENV_DIR = SINGULARITY_DIR+"/env"
SHELL_DIR = "DATA/bin"

def make_singularity_directories():
if os.path.exists("DATA"):
root_path = 'DATA'
folders = ['proc','dev','sys']
for folder in folders:
os.mkdir(os.path.join(root_path,folder))
folders = ['actions','libs','env']
os.mkdir(SINGULARITY_DIR)
for folder in folders:
os.mkdir(os.path.join(SINGULARITY_DIR,folder))
singularoty_files = ["labels.json","runscript","startscript"]
for file in singularoty_files:
open(os.path.join(SINGULARITY_DIR,file), 'a').close()

make_singularity_directories()

def copy_action_files():
for file in os.listdir("../../singularitd_files/actions/"):
bashCommand = "cp ../../singularitd_files/actions/{} DATA/.singularity.d/actions/".format(file)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

copy_action_files()

def write_env_file(env,env_file):
env_file = os.path.join(ENV_DIR, env_file)
with open(env_file, 'w+') as f:
for key,value in env.items():
f.write(key+"='"+value+"'\n")
bashCommand = "chmod +x {}".format(env_file)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

def make_environment_file():
env_files = [run_env_file,apps_file,base_file]
for file in env_files:
open(os.path.join(ENV_DIR,file), 'a').close()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do those files really need to exist?

#Write the environment file from config.yml file
source_config="METADATA/config.yml"
my_dict = yaml.load(open(source_config))
runs = my_dict.get('runs')
if len(runs) > 1:
# create one env file for each run:
for run in runs:
filename = run['id']+"_env.sh"
write_env_file(run.get('environ'), filename)
else:
print(len(runs))
write_env_file(runs[0].get('environ'),run_env_file)

make_environment_file()

def make_runscript():
source_config="METADATA/config.yml"
my_dict = yaml.load(open(source_config))
runs = my_dict['runs']
cmd="#!/bin/sh \n"
for run in runs:
binary = run['binary']
workingdir = run ['workingdir']
run_file = run['argv'][1]
if len(runs)>1:
cmd += "source /.singularity.d/env/{}_env.sh \n".format(run['id'])
cmd +="cd {0}\nexec {1} {2}\n".format(workingdir,binary,run_file)
with open(os.path.join(SINGULARITY_DIR, "runscript"), 'w') as f:
f.write(cmd)
bashCommand = "chmod +x {}/runscript".format(SINGULARITY_DIR)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

make_runscript()


def copy_busybox():
if not os.path.exists(SHELL_DIR):
os.makedirs(SHELL_DIR)
if not os.path.isfile(os.path.join(SHELL_DIR,"sh")):
print("no sh in bin")
bashCommand = "cp ../bin/sh {}".format(SHELL_DIR)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should probably always copy busybox, for example to /busybox (that's what reprounzip-docker does).

It can be that the /bin/sh in the package does not work, or does not support some things we need (like exec -a which we should use eventually)



copy_busybox()

def run_singularity_image():
home = os.environ['HOME']
print(home)
bashCommand = "singularity run -C -H {}:/something DATA".format(home)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can just use check_call()

output, error = process.communicate()
print(output)

run_singularity_image()
148 changes: 148 additions & 0 deletions reprounzip-singularity/Singularity_tar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import copy
import tarfile
import os
import sys
import yaml
import subprocess

# Open outer tar, the RPZ file
SINGULARITY_DIR = "../../.singularity.d"
RUN_ENV_FILE="90-environment.sh"
ENV_DIR = SINGULARITY_DIR+"/env"
OVERLAY_IMAGE = "repro_overlay.img"
IMAGE_TAR_FILE = "new.tar.gz"



def write_env_file(env,env_file):
env_file = os.path.join(ENV_DIR, env_file)
with open(env_file, 'w+') as f:
for key,value in env.items():
f.write(key+"='"+value+"'\n")
bashCommand = "chmod +x {}".format(env_file)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()


def make_environment_file():
#Write the environment file from config.yml file
source_config="METADATA/config.yml"
my_dict = yaml.load(open(source_config))
runs = my_dict.get('runs')
if len(runs) > 1:
# create one env file for each run:
for run in runs:
filename = run['id']+"_env.sh"
write_env_file(run.get('environ'), filename)
else:
write_env_file(runs[0].get('environ'),RUN_ENV_FILE)


def make_runscript():
source_config="METADATA/config.yml"
my_dict = yaml.load(open(source_config))
runs = my_dict['runs']
cmd = ''
for run in runs:
binary = run['binary']
workingdir = run ['workingdir']
run_file = run['argv'][1]
if len(runs)>1:
cmd += "source /.singularity.d/env/{}_env.sh \n".format(run['id'])
cmd +="cd {0}\n{1} {2}\n".format(workingdir,binary,run_file)
with open(os.path.join(SINGULARITY_DIR, "runscript"), 'w') as f:
f.write(cmd)
bashCommand = "chmod +x {}/runscript".format(SINGULARITY_DIR)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()


# Check if bin is present in the tar if not add bin and sh
def copy_busybox(tar):
if not "bin" in tar.getnames():
print("bin is absent")
tar.add("../../bin",arcname="bin")
elif not "sh" in tar.getnames():
print("bin but no sh")
tar.add("../../bin/sh", arcname='bin/sh')


def setup_singularity_image(filename):
rpz = tarfile.open(filename, 'r:*')

# Open the inner tar in the original, without extracting it to disk
data = rpz.extractfile('DATA.tar.gz')
tar = tarfile.open('DATA.tar.gz', fileobj=data)

# Open the new tar we're writing
new = tarfile.open('new.tar.gz', 'w:gz')
recursive = True
# For each member of the data tar
for info in tar.getmembers():
# Make a new TarInfo, removing the DATA/ prefix from the file name
new_info = copy.copy(info)
new_info.name = info.name[5:]

if new_info.name:
# Copy the file from the inner tar to the new tar
if new_info.isreg():
new.addfile(new_info, tar.extractfile(info.name))
# with tarfile.open(info.name, "rb") as f:
# new.addfile(new_info, f)

elif new_info.isdir():
new.addfile(new_info)
# if recursive:
# for f in os.listdir(tar.extractfile(info.name)):
# new.add(os.path.join(info.name, f), os.path.join('', f),recursive)
else:
new.addfile(new_info)
# Add the missing folders - proc,run, sys and temp_home
folders = ['proc','dev','sys','temp_home']
for folder in folders:
new.add("../../missing_folders/"+folder,folder)


rpz.extractall()
make_environment_file()
make_runscript()
copy_busybox(new)
new.add(SINGULARITY_DIR,arcname=".singularity.d")
tar.close()
data.close()
rpz.close()
new.close()



def create_overlay_image(OVERLAY_IMAGE):
bashCommand = "singularity image.create {}".format(OVERLAY_IMAGE)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
print(output)
return error


def run_singularity_image(IMAGE_TAR_FILE):
home = os.environ['HOME']
if not create_overlay_image(OVERLAY_IMAGE):
bashCommand = "singularity run --overlay {0} -C -H {1}:/temp_home {2}".format(OVERLAY_IMAGE,home,IMAGE_TAR_FILE)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
#copy the output from the overlay to some dir and destroy the overlay later
print(output)




args = sys.argv[1:]
filename, IMAGE_DIR = args

if not os.path.exists(IMAGE_DIR):
os.makedirs(IMAGE_DIR)
os.chdir(IMAGE_DIR)
filename = "../"+filename
setup_singularity_image(filename)
run_singularity_image(IMAGE_TAR_FILE)


Loading