Source code for snakemake.deployment.containerize

from pathlib import Path
import hashlib
import os

from snakemake.exceptions import WorkflowError
from snakemake.logging import logger
from snakemake.sourcecache import LocalSourceFile


CONDA_ENV_PATH = "/conda-envs"


[docs] def containerize(workflow, dag): if any( job.conda_env_spec.contains_wildcard for job in dag.jobs if job.conda_env_spec is not None ): raise WorkflowError( "Containerization of conda based workflows is not allowed if any conda env definition contains a wildcard." ) def relfile(env): if isinstance(env.file, LocalSourceFile): return os.path.relpath(env.file.get_path_or_uri(), os.getcwd()) else: return env.file.get_path_or_uri() envs = sorted( set( job.conda_env_spec.get_conda_env(workflow, env_dir=CONDA_ENV_PATH) for job in dag.jobs if job.conda_env_spec is not None ), key=relfile, ) envhash = hashlib.sha256() for env in envs: logger.info(f"Hashing conda environment {relfile(env)}.") # build a hash of the environment contents envhash.update(env.content) print("FROM condaforge/mambaforge:latest") print('LABEL io.github.snakemake.containerized="true"') print(f'LABEL io.github.snakemake.conda_env_hash="{envhash.hexdigest()}"') generated = set() get_env_cmds = [] generate_env_cmds = [] for env in envs: if env.content_hash in generated: # another conda env with the same content was generated before continue prefix = Path(CONDA_ENV_PATH) / env.content_hash env_source_path = relfile(env) env_target_path = prefix / "environment.yaml" get_env_cmds.append("\n# Conda environment:") get_env_cmds.append(f"# source: {env_source_path}") get_env_cmds.append(f"# prefix: {prefix}") get_env_cmds.append( "\n".join(map("# {}".format, env.content.decode().strip().split("\n"))) ) get_env_cmds.append(f"RUN mkdir -p {prefix}") if isinstance(env.file, LocalSourceFile): get_env_cmds.append(f"COPY {env_source_path} {env_target_path}") else: get_env_cmds.append(f"ADD {env.file.get_path_or_uri()} {env_target_path}") generate_env_cmds.append( f"mamba env create --prefix {prefix} --file {env_target_path} &&" ) generated.add(env.content_hash) print("\n# Step 1: Retrieve conda environments") for cmd in get_env_cmds: print(cmd) print("\n# Step 2: Generate conda environments") print("\nRUN", " \\\n ".join(generate_env_cmds), "\\\n mamba clean --all -y")