Source code for snakemake.linting.snakefiles

import re
from itertools import chain

from snakemake.linting import Linter, Lint, links, NAME_PATTERN

ABS_PATH_PATTERN = "(?P<quote>['\"])(?P<path>(?:/[^/\\n]+?)+?)(?P=quote)"
PATH_PATTERN = "(?P<quote>['\"])(?P<path>/?(?:[^/]+?/)+?(?:[^/]+?)?)(?P=quote)"



[docs]
class SnakefileLinter(Linter):
    def item_desc_plain(self, snakefile):
        return f"snakefile {snakefile}"

    def item_desc_json(self, snakefile):
        return {"snakefile": snakefile}

    def read_item(self, snakefile):
        return self.workflow.sourcecache.open(snakefile).read()

    def lint_absolute_paths(self, snakefile, regex=re.compile(ABS_PATH_PATTERN)):
        for match in regex.finditer(snakefile):
            line = get_line(match, snakefile)
            yield Lint(
                title='Absolute path "{}" in line {}'.format(match.group("path"), line),
                body="Do not define absolute paths inside of the workflow, since this "
                "renders your workflow irreproducible on other machines. "
                "Use path relative to the working directory instead, or make the path "
                "configurable via a config file.",
                links=[links.config],
            )

    def lint_mixed_func_and_rules(
        self,
        snakefile,
        rule_regex=re.compile("rule .+?:"),
        func_regex=re.compile("def .+?:"),
    ):
        if rule_regex.search(snakefile) and func_regex.search(snakefile):
            yield Lint(
                title="Mixed rules and functions in same snakefile.",
                body="Small one-liner functions used only once should be "
                "defined as lambda expressions. Other functions should be collected "
                "in a common module, e.g. 'rules/common.smk'. This makes the workflow "
                "steps more readable.",
                links=[links.includes],
            )

    def lint_path_add(
        self,
        snakefile,
        regex1=re.compile(f"{NAME_PATTERN} *\\+ *{PATH_PATTERN}"),
        regex2=re.compile(f"{PATH_PATTERN} *\\+ *{NAME_PATTERN}"),
    ):
        for match in chain(regex1.finditer(snakefile), regex2.finditer(snakefile)):
            line = get_line(match, snakefile)
            yield Lint(
                title=f"Path composition with '+' in line {line}",
                body="This becomes quickly unreadable. Usually, it is better to endure some "
                "redundancy against having a more readable workflow. Hence, just repeat common "
                'prefixes. If path composition is unavoidable, use pathlib or (python >= 3.6) string formatting with f"...". ',
            )

    def lint_envvars(
        self,
        snakefile,
        regex=re.compile(r"os.environ\[(?P<quote>['\"])(?P<name>.+)?(?P=quote)\]"),
    ):
        for match in regex.finditer(snakefile):
            line = get_line(match, snakefile)
            name = match.group("name")
            if name not in self.workflow.envvars:
                yield Lint(
                    title="Environment variable {} used but not asserted with envvars directive in line {}.".format(
                        name, line
                    ),
                    body="Asserting existence of environment variables with the envvars directive ensures proper error "
                    "messages if the user fails to invoke a workflow with all required environment variables defined. "
                    "Further, it allows snakemake to pass them on in case of distributed execution.",
                    links=[links.envvars],
                )

    def lint_singularity(self, snakefile, regex=re.compile("singularity:")):
        for match in regex.finditer(snakefile):
            line = get_line(match, snakefile)
            yield Lint(
                title="Deprecated singularity directive used for container definition in line {}.".format(
                    line
                ),
                body="Use the container directive instead (it is agnostic of the underlying container runtime).",
                links=[links.containers],
            )

    def lint_tab_usage(self, snakefile, regex=re.compile(r"^ *\t")):
        for match in regex.finditer(snakefile):
            line = get_line(match, snakefile)
            yield Lint(
                title=f"Tab usage in line {line}.",
                body="Both Python and Snakemake can get confused when mixing tabs and spaces for indentation. "
                "It is recommended to only use spaces for indentation.",
            )




[docs]
def get_line(match, snakefile):
    return snakefile[: match.start()].count("\n") + 1