Source code for planetary_coverage.misc.download

"""Generic web download method."""

import certifi
import ssl
from pathlib import Path
from urllib.request import urlopen

from .logger import logger


log_download, debug_download = logger('Download', info_stdout=True)

# Fix CERTIFICATE_VERIFY_FAILED error in python 3.13 (https://stackoverflow.com/q/79123649)
CONTEXT = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
CONTEXT.load_verify_locations(cafile=certifi.where())



[docs]
def wget(url, fout, skip=False, force=False):
    """Web download.

    Parameters
    ----------
    url: str
        URL to download
    fout: str or pathlib.Path
        Output file.
    skip: bool, optional
        Skip download if the output file already exists (default: `False`).
        Has the priority over :py:attr:`force`.
    force: bool, optional
        Force download even if the file exists (default: `False`).

    Returns
    -------
    pathlib.Path
        Downloaded file path.

    Raises
    ------
    ValueError
        If the URL provided is not starting with `http[s]://`.
    FileExistsError
        If the file already exists and `force` is `False` (default).

    Note
    ----
    The missing sub-directories will be created.

    By default, logging is set at INFO level.
    Use :py:func:`debug_download` function to
    increase or disable the logging output.

    """
    if not url.startswith(('http://', 'https://')):
        raise ValueError(f'URL must start with `http[s]://` not `{url}`')

    fname = Path(fout)

    if fname.exists() and skip:
        return fname

    if not fname.exists() or force:
        # Create sub directories (if missing)
        fname.parent.mkdir(parents=True, exist_ok=True)

        # Download the content and save it in `fname`
        log_download.info(url)

        with urlopen(url, context=CONTEXT) as resp:  # noqa: S310 (URL audited above)
            fname.write_bytes(resp.read())

        log_download.debug('Saved in: %s', fname)

    else:  # when file exists and not forced
        raise FileExistsError(fname)

    return fname