Listing the entries in an intake catalog#
(or doing other manipulations on them)
[1]:
import intake
import logging
from collections.abc import Iterable
[2]:
def warn_esm_cat(cat, child, position):
logging.warning(
f"skipping {'.'.join(position)}.{child}, as it seems to be an intake-esm catalog"
)
def traverse_tree(
cat,
subcat_callback,
entry_callback,
esm_cat_callback=warn_esm_cat,
levels=0,
position=list(),
):
"""Traverses an intake tree and call a function on everything it finds in it.
subcat_callback is called on anything iterable (should be sub-catalogs)
entry_callback is called on anything not iterable (should be datasets)
esm_cat_callback will be called on intake_esm_catalogs. Defaults to a warning message, as loading them can consume a lot of memory and time.
"""
if levels and (levels - 1 < len(position)):
return
for child in list(cat):
logging.debug(f"processing {child}")
if detect_esm_cat(cat, child, position):
esm_cat_callback(cat, child, position)
continue
try:
cat[child]
except FileNotFoundError as missing:
logging.critical(
f"Error processing {'.'.join(position)}.{child}: File not found: {missing}"
)
continue
if isinstance(cat[child], Iterable):
subcat_callback(cat, child, position)
traverse_tree(
cat[child],
subcat_callback,
entry_callback,
levels=levels,
position=position + [child],
)
else:
entry_callback(cat, child, position)
def detect_esm_cat(cat, child, position):
try:
if "esm_datastore" in str(cat._entries[child]._driver):
return True
except Exception as e:
logging.error(
f"Can't really decide the type of \n{position}{child}\nran into {e}"
)
return False
def print_tree(cat, levels=0):
def printer(cat, child, position, appendix=""):
try:
parameters = [
p["name"] for p in cat[child].describe().get("user_parameters", [])
]
if len(parameters) > 0:
parameter_str = f"({', '.join(parameters)})"
else:
parameter_str = ""
except Exception as e:
if str(e) == "Source was not made from a catalog entry":
parameter_str = ""
else:
logging.warning(str(e))
print(f"{' '*len(position)}{child} {parameter_str} {appendix}")
def subcat_printer(*args, **kwargs):
return printer(*args, **kwargs, appendix="🌳")
traverse_tree(cat, subcat_printer, printer, levels=levels)
[3]:
cat = intake.open_catalog("https://data.nextgems-h2020.eu/catalog.yaml")
[4]:
print_tree(cat, levels=2)
ICON 🌳
ngc4008 (time, zoom)
ngc4007 (time, zoom)
ngc4006 (time, zoom)
ngc4005 (time, zoom)
ngc3028 (time, zoom)
ngc3028_bc_land
ngc3026_WILL_BE_DELETED (time, zoom)
HAMOCC 🌳
ngc3542 🌳
erc1011 🌳
erc1017 🌳
IFS 🌳
IFS_9-FESOM_5-production 🌳
IFS_4.4-FESOM_5-cycle3 🌳
IFS_9-FESOM_5-cycle3 🌳
IFS_9-NEMO_25-cycle3 🌳
IFS_28-NEMO_25-cycle3 🌳
IFS_4.4-FESOM_5-cycle3-nofastdata 🌳
IFS_4.4-FESOM_5-cycle3-fastdata 🌳
IFS_grids 🌳
FESOM 🌳
IFS_4.4-FESOM_5-cycle3 🌳
IFS_28-FESOM_25-cycle3 🌳
IFS_9-FESOM_5-cycle3 🌳
FESOM_13_tropo_age_interpolated 🌳