Source code for spack.util.remote_file_cache
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import hashlib
import os.path
import pathlib
import shutil
import tempfile
import urllib.parse
import urllib.request
from typing import Optional
import spack.llnl.util.tty as tty
import spack.util.crypto
from spack.llnl.util.filesystem import copy, join_path, mkdirp
from spack.util.path import canonicalize_path
from spack.util.url import validate_scheme
[docs]
def raw_github_gitlab_url(url: str) -> str:
"""Transform a github URL to the raw form to avoid undesirable html.
Args:
url: url to be converted to raw form
Returns:
Raw github/gitlab url or the original url
"""
# Note we rely on GitHub to redirect the 'raw' URL returned here to the
# actual URL under https://raw.githubusercontent.com/ with '/blob'
# removed and or, '/blame' if needed.
if "github" in url or "gitlab" in url:
return url.replace("/blob/", "/raw/")
return url
[docs]
def fetch_remote_text_file(url: str, dest_dir: str) -> str:
"""Retrieve the text file from the url into the destination directory.
Arguments:
url: URL for the remote text file
dest_dir: destination directory in which to stage the file locally
Returns:
Path to the fetched file
Raises:
ValueError: if there are missing required arguments
"""
from spack.util.web import fetch_url_text # circular import
if not url:
raise ValueError("Cannot retrieve the remote file without the URL")
raw_url = raw_github_gitlab_url(url)
tty.debug(f"Fetching file from {raw_url} into {dest_dir}")
return fetch_url_text(raw_url, dest_dir=dest_dir)
[docs]
def local_path(raw_path: str, sha256: str, dest: Optional[str] = None) -> str:
"""Determine the actual path and, if remote, stage its contents locally.
Args:
raw_path: raw path with possible variables needing substitution
sha256: the expected sha256 if the file is remote
dest: destination path
Returns: resolved, normalized local path
Raises:
ValueError: missing or mismatched arguments, unsupported URL scheme
"""
if not raw_path:
raise ValueError("path argument is required to cache remote files")
file_schemes = ["", "file"]
# Allow paths (and URLs) to contain spack config/environment variables,
# etc.
path = canonicalize_path(raw_path, dest)
# Save off the Windows drive of the canonicalized path (since now absolute)
# to ensure recognized by URL parsing as a valid file "scheme".
win_path = pathlib.PureWindowsPath(path)
if win_path.drive:
file_schemes.append(win_path.drive.lower().strip(":"))
url = urllib.parse.urlparse(path)
# Path isn't remote so return normalized, absolute path with substitutions.
if url.scheme in file_schemes:
return os.path.normpath(path)
# If scheme is not valid, path is not a supported url.
if validate_scheme(url.scheme):
# Fetch files from supported URL schemes.
if url.scheme in ("http", "https", "ftp"):
if not dest:
raise ValueError("Requires the destination argument to cache remote files")
assert os.path.isabs(dest), (
f"Remote file destination '{dest}' must be an absolute path"
)
# Stage the remote configuration file
tmpdir = tempfile.mkdtemp()
try:
staged_path = fetch_remote_text_file(path, tmpdir)
# Ensure the sha256 is expected.
checksum = spack.util.crypto.checksum(hashlib.sha256, staged_path)
if sha256 and checksum != sha256:
raise ValueError(
f"Actual sha256 ('{checksum}') does not match expected ('{sha256}')"
)
# Help the user by reporting the required checksum.
if not sha256:
raise ValueError(f"Requires sha256 ('{checksum}') to cache remote files.")
# Copy the file to the destination directory
dest_dir = join_path(dest, checksum)
if not os.path.exists(dest_dir):
mkdirp(dest_dir)
cache_path = join_path(dest_dir, os.path.basename(staged_path))
copy(staged_path, cache_path)
tty.debug(f"Cached {raw_path} in {cache_path}")
# Stash the associated URL to aid with debugging
with open(join_path(dest_dir, "source_url.txt"), "w", encoding="utf-8") as f:
f.write(f"{raw_path}\n")
return cache_path
except ValueError as err:
tty.warn(f"Unable to cache {raw_path}: {str(err)}")
raise
finally:
shutil.rmtree(tmpdir)
raise ValueError(f"Unsupported URL scheme ({url.scheme}) in {raw_path}")
else:
raise ValueError(f"Invalid URL scheme ({url.scheme}) in {raw_path}")