Source code for spack.cmd.blame

# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import argparse
import os
import pathlib
import re
import sys
from typing import Optional, Union

import spack.config
import spack.llnl.util.tty as tty
import spack.repo
import spack.util.git
import spack.util.spack_json as sjson
from spack.cmd import spack_is_git_repo
from spack.llnl.util.filesystem import working_dir
from spack.llnl.util.lang import pretty_date
from spack.llnl.util.tty.colify import colify_table
from spack.util.executable import ProcessError

description = "show contributors to packages"
section = "query"
level = "long"

git = spack.util.git.git(required=True)



[docs]
def setup_parser(subparser: argparse.ArgumentParser) -> None:
    view_group = subparser.add_mutually_exclusive_group()
    view_group.add_argument(
        "-t",
        "--time",
        dest="view",
        action="store_const",
        const="time",
        default="time",
        help="sort by last modification date (default)",
    )
    view_group.add_argument(
        "-p",
        "--percent",
        dest="view",
        action="store_const",
        const="percent",
        help="sort by percent of code",
    )
    view_group.add_argument(
        "-g",
        "--git",
        dest="view",
        action="store_const",
        const="git",
        help="show git blame output instead of summary",
    )
    subparser.add_argument(
        "--json",
        action="store_true",
        default=False,
        help="output blame as machine-readable json records",
    )

    subparser.add_argument(
        "package_or_file",
        help="name of package to show contributions for, or path to a file in the spack repo",
    )




[docs]
def print_table(rows, last_mod, total_lines, emails):
    """
    Given a set of rows with authors and lines, print a table.
    """
    table = [["LAST_COMMIT", "LINES", "%", "AUTHOR", "EMAIL"]]
    for author, nlines in rows:
        table += [
            [
                pretty_date(last_mod[author]),
                nlines,
                round(nlines / float(total_lines) * 100, 1),
                author,
                emails[author],
            ]
        ]

    table += [[""] * 5]
    table += [[pretty_date(max(last_mod.values())), total_lines, "100.0"] + [""] * 3]

    colify_table(table)




[docs]
def dump_json(rows, last_mod, total_lines, emails):
    """
    Dump the blame as a json object to the terminal.
    """
    result = {}
    authors = []
    for author, nlines in rows:
        authors.append(
            {
                "last_commit": pretty_date(last_mod[author]),
                "lines": nlines,
                "percentage": round(nlines / float(total_lines) * 100, 1),
                "author": author,
                "email": emails[author],
            }
        )

    result["authors"] = authors
    result["totals"] = {
        "last_commit": pretty_date(max(last_mod.values())),
        "lines": total_lines,
        "percentage": "100.0",
    }

    sjson.dump(result, sys.stdout)




[docs]
def git_prefix(path: Union[str, pathlib.Path]) -> Optional[pathlib.Path]:
    """Return the top level directory if path is under a git repository.

    Args:
      path: path of the item presumably under a git repository

    Returns: path to the root of the git repository
    """
    if not os.path.exists(path):
        return None

    work_dir = path if os.path.isdir(path) else os.path.dirname(path)
    with working_dir(work_dir):
        try:
            result = git("rev-parse", "--show-toplevel", output=str, error=str)
            return pathlib.Path(result.split("\n")[0])
        except ProcessError:
            tty.die(f"'{path}' is not in a git repository.")




[docs]
def package_repo_root(path: Union[str, pathlib.Path]) -> Optional[pathlib.Path]:
    """Find the appropriate package repository's git root directory.

    Provides a warning for a remote package repository since there is a risk that
    the blame results are inaccurate.

    Args:
      path: path to an arbitrary file presumably in one of the spack package repos

    Returns: path to the package repository's git root directory or None
    """
    descriptors = spack.repo.RepoDescriptors.from_config(
        lock=spack.repo.package_repository_lock(), config=spack.config.CONFIG
    )
    path = pathlib.Path(path)
    prefix: Optional[pathlib.Path] = None
    for _, desc in descriptors.items():
        # Handle the remote case, whose destination is by definition the git root
        if hasattr(desc, "destination"):
            repo_dest = pathlib.Path(desc.destination)
            if (repo_dest / ".git").exists():
                prefix = repo_dest

                # TODO: replace check with `is_relative_to` once supported
                if prefix and str(path).startswith(str(prefix)):
                    return prefix

        # Handle the local repository case, making sure it's a spack repository.
        if hasattr(desc, "path"):
            repo_path = pathlib.Path(desc.path)
            if "spack_repo" in repo_path.parts:
                prefix = git_prefix(repo_path)

                # TODO: replace check with `is_relative_to` once supported
                if prefix and str(path).startswith(str(prefix)):
                    return prefix

    return None




[docs]
def git_supports_unshallow() -> bool:
    output = git("fetch", "--help", output=str, error=str)
    return "--unshallow" in output




[docs]
def ensure_full_history(prefix: str, path: str) -> None:
    """Ensure the git repository at the prefix has its full history.

    Args:
        prefix: the root directory of the git repository
        path: the package or file name under consideration (for messages)
    """
    assert os.path.isdir(prefix)

    with working_dir(prefix):
        shallow_dir = os.path.join(prefix, ".git", "shallow")
        if os.path.isdir(shallow_dir):
            if git_supports_unshallow():
                try:
                    # Capture the error output (e.g., irrelevant for full repo)
                    # to ensure the output is clean.
                    git("fetch", "--unshallow", error=str)
                except ProcessError as e:
                    tty.die(
                        f"Cannot report blame for {path}.\n"
                        "Unable to retrieve the full git history for "
                        f'{prefix} due to "{str(e)}" error.'
                    )
            else:
                tty.die(
                    f"Cannot report blame for {path}.\n"
                    f"Unable to retrieve the full git history for {prefix}. "
                    "Use a newer 'git' that supports 'git fetch --unshallow'."
                )




[docs]
def blame(parser, args):
    # make sure this is a git repo
    if not spack_is_git_repo():
        tty.die("This spack is not a git clone. You cannot use 'spack blame'.")

    # Get the name of the path to blame and its repository prefix
    # so we can honor any .git-blame-ignore-revs that may be present.
    blame_file = None
    prefix = None
    if os.path.exists(args.package_or_file):
        blame_file = os.path.realpath(args.package_or_file)
        prefix = package_repo_root(blame_file)

    # Get path to what we assume is a package (including to a cached version
    # of a remote package repository.)
    if not blame_file:
        try:
            blame_file = spack.repo.PATH.filename_for_package_name(args.package_or_file)
        except spack.repo.UnknownNamespaceError:
            # the argument is not a package (or does not exist)
            pass

        if blame_file and os.path.isfile(blame_file):
            prefix = package_repo_root(blame_file)

    if not blame_file or not os.path.exists(blame_file):
        tty.die(f"'{args.package_or_file}' does not exist.")

    if prefix is None:
        tty.msg(f"'{args.package_or_file}' is not within a spack package repository")

    path_prefix = git_prefix(blame_file)
    if path_prefix != prefix:
        # You are attempting to get 'blame' for a path outside of a configured
        # package repository (e.g., within a spack/spack clone). We'll use the
        # path's prefix instead to ensure working under the proper git
        # repository.
        prefix = path_prefix

    # Make sure we can get the full/known blame even when the repository
    # is remote.
    ensure_full_history(prefix, args.package_or_file)

    # Get blame information for the path EVEN when it is located in a different
    # spack repository (e.g., spack/spack-packages) or a different git
    # repository.
    with working_dir(prefix):
        # Now we can get the blame results.
        options = ["blame"]

        # ignore the great black reformatting of 2022
        ignore_file = prefix / ".git-blame-ignore-revs"
        if ignore_file.exists():
            options.extend(["--ignore-revs-file", str(ignore_file)])

        try:
            if args.view == "git":
                options.append(str(blame_file))
                git(*options)
                return
            else:
                options.extend(["--line-porcelain", str(blame_file)])
                output = git(*options, output=str, error=str)
                lines = output.split("\n")
        except ProcessError as err:
            # e.g., blame information is not tracked if the path is a directory
            tty.die(f"Blame information is not tracked for '{blame_file}':\n{err.long_message}")

    # Histogram authors
    counts = {}
    emails = {}
    last_mod = {}
    total_lines = 0
    for line in lines:
        match = re.match(r"^author (.*)", line)
        if match:
            author = match.group(1)

        match = re.match(r"^author-mail (.*)", line)
        if match:
            email = match.group(1)

        match = re.match(r"^author-time (.*)", line)
        if match:
            mod = int(match.group(1))
            last_mod[author] = max(last_mod.setdefault(author, 0), mod)

        # ignore comments
        if re.match(r"^\t[^#]", line):
            counts[author] = counts.setdefault(author, 0) + 1
            emails.setdefault(author, email)
            total_lines += 1

    if args.view == "time":
        rows = sorted(counts.items(), key=lambda t: last_mod[t[0]], reverse=True)
    else:  # args.view == 'percent'
        rows = sorted(counts.items(), key=lambda t: t[1], reverse=True)

    # Dump as json
    if args.json:
        dump_json(rows, last_mod, total_lines, emails)

    # Print a nice table with authors and emails
    else:
        print_table(rows, last_mod, total_lines, emails)