Source code for planetary_coverage.misc.download
"""Generic web download method."""
import certifi
import ssl
from pathlib import Path
from urllib.request import urlopen
from .logger import logger
log_download, debug_download = logger('Download', info_stdout=True)
# Fix CERTIFICATE_VERIFY_FAILED error in python 3.13 (https://stackoverflow.com/q/79123649)
CONTEXT = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
CONTEXT.load_verify_locations(cafile=certifi.where())
[docs]
def wget(url, fout, skip=False, force=False):
"""Web download.
Parameters
----------
url: str
URL to download
fout: str or pathlib.Path
Output file.
skip: bool, optional
Skip download if the output file already exists (default: `False`).
Has the priority over :py:attr:`force`.
force: bool, optional
Force download even if the file exists (default: `False`).
Returns
-------
pathlib.Path
Downloaded file path.
Raises
------
ValueError
If the URL provided is not starting with `http[s]://`.
FileExistsError
If the file already exists and `force` is `False` (default).
Note
----
The missing sub-directories will be created.
By default, logging is set at INFO level.
Use :py:func:`debug_download` function to
increase or disable the logging output.
"""
if not url.startswith(('http://', 'https://')):
raise ValueError(f'URL must start with `http[s]://` not `{url}`')
fname = Path(fout)
if fname.exists() and skip:
return fname
if not fname.exists() or force:
# Create sub directories (if missing)
fname.parent.mkdir(parents=True, exist_ok=True)
# Download the content and save it in `fname`
log_download.info(url)
with urlopen(url, context=CONTEXT) as resp: # noqa: S310 (URL audited above)
fname.write_bytes(resp.read())
log_download.debug('Saved in: %s', fname)
else: # when file exists and not forced
raise FileExistsError(fname)
return fname