"""Public catalog and loader for yield input packages (YIPs).
The CATALOG dict is the source of truth: keys are flat YIP names like
``eac1_aavc_2d``; values carry just enough metadata to drive downloads
and filtered discovery (``telescope``, ``coronagraph``, ``sampling``,
``md5``). Descriptive metadata (designer, wavelengths, dark-zone
extent, ...) lives in the FITS headers inside each YIP, not here.
Archives are hosted as assets on a tagged GitHub release of this repo
(currently ``data-v1``) and fetched via pooch over HTTPS. The release
tag is separate from the code-release lifecycle managed by
release-please. To publish new YIPs: bump ``DATA_RELEASE_TAG`` to a new
``data-vN``, attach the updated zips to that release, and refresh the
md5 hashes here.
The catalog currently includes only the two reference YIPs used by the
yippy paper validation pipeline. Long-term YIP hosting will be provided
by ExEP, and when that catalog comes online the discovery API here will
grow back into a thin proxy over it.
Public API:
- ``fetch_yip(name=None, *, telescope=None, coronagraph=None, sampling=None,
cache_path=None) -> str``
- ``cache_dir() -> Path``
- ``list_yips(**filters) -> list[str]``
- ``yip_exists(name) -> bool``
- ``yip_info(name) -> dict``
"""
from __future__ import annotations
import difflib
import logging
import os
from pathlib import Path
from typing import Any
import pooch
from pooch import Unzip
# By name, not relative import: scripts/build_zenodo_archives.py loads this
# module standalone via importlib and has no parent package.
logger = logging.getLogger("yippy")
# Quiet pooch's INFO-level chatter so YIP download events come through the
# yippy logger in the expected format. Warnings and errors still surface.
logging.getLogger("pooch").setLevel(logging.WARNING)
# Users can pin the YIP cache to a custom directory by exporting this env var.
# Resolution priority for a cache location:
# 1. ``cache_path`` keyword passed to fetch_yip
# 2. ``YIPPY_CACHE_DIR`` environment variable
# 3. pooch.os_cache("yippy") -- platform default via platformdirs
CACHE_DIR_ENV_VAR = "YIPPY_CACHE_DIR"
# ---------------------------------------------------------------------------
# Release tag carrying the YIP zip assets on this repo's GitHub releases.
# Bump to ``data-vN`` when the underlying YIP files change.
# ---------------------------------------------------------------------------
DATA_RELEASE_TAG: str = "data-v2"
_DATA_BASE_URL: str = (
f"https://github.com/CoreySpohn/yippy/releases/download/{DATA_RELEASE_TAG}/"
)
# ---------------------------------------------------------------------------
# Catalog
#
# Currently a handful of reference YIPs from the Coronagraph Design Survey.
# Long-term YIP hosting will be provided by ExEP.
# ---------------------------------------------------------------------------
CATALOG: dict[str, dict[str, Any]] = {
"eac1_aavc_2d": {
"telescope": "eac1",
"coronagraph": "aavc",
"designer": "Susan Redmond",
"md5": "md5:1f4892faff18e55cbec9781a055bea4d",
},
"eac1_optimal_order_6_1d": {
"telescope": "eac1",
"coronagraph": "optimal_order_6",
"designer": "Rus Belikov",
"md5": "md5:df52540008a0e85467720ec91c3a84b8",
},
"usort_offaxis_ovc": {
# No telescope-architecture label; an off-axis vortex coronagraph
# design study with no fixed EAC pairing.
"coronagraph": "offaxis_ovc",
"designer": "Susan Redmond, Emiel Por",
"sampling": "1d",
"md5": "md5:f288b20f329412917d0a393a4d135439",
},
}
# Inject the sampling regime as a derived field on each catalog entry. Names
# that follow the ``{telescope}_{coronagraph}_(1d|2d)`` convention can omit
# ``sampling`` from their entry and have it parsed from the key suffix;
# entries whose name does not fit the convention (e.g. legacy or
# unconventionally-named YIPs) must set ``sampling`` explicitly.
for _name, _meta in CATALOG.items():
if "sampling" in _meta:
continue
if _name.endswith("_1d"):
_meta["sampling"] = "1d"
elif _name.endswith("_2d"):
_meta["sampling"] = "2d"
else:
raise RuntimeError(
f"Catalog entry {_name!r} must either set 'sampling' explicitly "
"or end with `_1d` / `_2d`."
)
del _name, _meta
[docs]
def _make_pikachu(cache_dir_path: str | Path) -> pooch.Pooch:
"""Build a pooch instance for the YIP catalog at ``cache_dir_path``."""
return pooch.create(
path=cache_dir_path,
base_url=_DATA_BASE_URL,
registry={f"{name}.zip": meta["md5"] for name, meta in CATALOG.items()},
)
# Default pooch instance for yippy's YIP cache (platform-default location).
# Named after Corey's dog. The env-var and per-call overrides build their own
# pooch on demand; this is the fast path for zero-config users.
_PIKACHU = _make_pikachu(pooch.os_cache("yippy"))
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def cache_dir() -> Path:
"""Return the directory where yippy caches YIP archives by default.
Resolution order:
1. ``YIPPY_CACHE_DIR`` environment variable, if set.
2. ``pooch.os_cache("yippy")`` -- the OS-conventional cache directory
provided by platformdirs (e.g. ``~/Library/Caches/yippy`` on macOS,
``~/.cache/yippy`` on Linux).
Override per call by passing ``cache_path`` to :func:`fetch_yip`.
"""
env = os.environ.get(CACHE_DIR_ENV_VAR)
if env:
return Path(env).expanduser()
return Path(_PIKACHU.path)
[docs]
def fetch_yip(
name: str | None = None,
*,
telescope: str | None = None,
coronagraph: str | None = None,
sampling: str | None = None,
cache_path: str | Path | None = None,
) -> str:
"""Download a YIP archive (if not cached), unpack, and return its path.
Pass either ``name`` (flat: ``"eac1_aavc_2d"``) OR keyword filters
(structured: ``telescope="eac1", coronagraph="aavc", sampling="2d"``).
The keyword form must resolve to exactly one catalog entry; pass
``sampling`` whenever a ``(telescope, coronagraph)`` pair has both
1D and 2D variants.
YIPs are cached at :func:`cache_dir` (which honors the
``YIPPY_CACHE_DIR`` environment variable). Pass ``cache_path`` to
override the cache location for this call only -- useful for
shared institutional setups or project-scoped caches.
Raises:
TypeError: if both ``name`` and filters are passed (or neither).
KeyError: if ``name`` is not in the catalog.
ValueError: if the structured query has zero or multiple matches.
"""
filters: dict[str, str] = {}
if telescope is not None:
filters["telescope"] = telescope
if coronagraph is not None:
filters["coronagraph"] = coronagraph
if sampling is not None:
filters["sampling"] = sampling
if name is not None and filters:
raise TypeError("Pass either `name` or filter kwargs, not both.")
if name is None and not filters:
raise TypeError("Pass either `name` or filter kwargs.")
if name is not None:
if name not in CATALOG:
suggestions = difflib.get_close_matches(name, CATALOG.keys(), n=5)
hint = f" Did you mean: {', '.join(suggestions)}?" if suggestions else ""
raise KeyError(f"Unknown YIP {name!r}.{hint}")
resolved = name
else:
matches = list_yips(**filters)
if not matches:
raise ValueError(
f"No YIP matches {filters!r}. Try yippy.list_yips() to see options."
)
if len(matches) > 1:
raise ValueError(
f"Filters {filters!r} matched multiple YIPs: {matches}. "
"Pass `name=` directly or narrow filters."
)
resolved = matches[0]
if cache_path is not None:
pikachu = _make_pikachu(cache_path)
elif os.environ.get(CACHE_DIR_ENV_VAR):
pikachu = _make_pikachu(cache_dir())
else:
pikachu = _PIKACHU
logger.info(f"Fetching YIP {resolved!r} (cache: {pikachu.path})")
paths = pikachu.fetch(f"{resolved}.zip", processor=Unzip())
# Unzip returns a list of paths under the unzipped dir. The YIP itself
# lives at the archive root under `{name}/`. Resolve to that directory.
sample_path = Path(paths[0])
yip_dir = sample_path.parent
# Walk up looking for a directory named after the YIP. If we hit a
# `{resolved}.zip` directory first (e.g. the unzip cache folder),
# treat its sibling/parent path as the YIP directory.
while yip_dir.parent != yip_dir:
if yip_dir.name == resolved:
break
if yip_dir.name == f"{resolved}.zip":
yip_dir = yip_dir.parent / resolved
break
yip_dir = yip_dir.parent
else:
# Fallback: the immediate parent of the sample file.
yip_dir = sample_path.parent
logger.info(f"YIP {resolved!r} available at {yip_dir}")
return str(yip_dir)
_FILTERABLE_FIELDS = frozenset({"telescope", "coronagraph", "sampling"})
[docs]
def list_yips(**filters: str) -> list[str]:
"""Return catalog names matching all filters. No filters returns all names.
Raises:
TypeError: if a filter key is not a valid catalog field.
"""
unknown = set(filters) - _FILTERABLE_FIELDS
if unknown:
raise TypeError(
f"unknown filter keys: {sorted(unknown)}. "
f"Valid keys are {sorted(_FILTERABLE_FIELDS)}."
)
out = []
for name, meta in CATALOG.items():
if all(meta.get(k) == v for k, v in filters.items()):
out.append(name)
return out
[docs]
def yip_exists(name: str) -> bool:
"""True iff ``name`` is an available YIP in the catalog."""
return name in CATALOG
[docs]
def yip_info(name: str) -> dict[str, Any]:
"""Return the catalog metadata dict for ``name``.
Raises:
KeyError: if ``name`` is not in the catalog.
"""
if name not in CATALOG:
raise KeyError(name)
return CATALOG[name]