{ "cells": [ { "cell_type": "markdown", "id": "cfefdc57-b93e-4e2d-a41e-751dd3446077", "metadata": {}, "source": [ "# Listing the entries in an intake catalog\n", "\n", "(or doing other manipulations on them)" ] }, { "cell_type": "code", "execution_count": 1, "id": "d9c6e160-2a95-400f-a08c-8912d07ba2e9", "metadata": { "tags": [] }, "outputs": [], "source": [ "import intake\n", "import logging\n", "from collections.abc import Iterable" ] }, { "cell_type": "code", "execution_count": 2, "id": "9bb19025-8b5d-4eba-91fd-f3c36546364c", "metadata": { "tags": [] }, "outputs": [], "source": [ "def warn_esm_cat(cat, child, position):\n", " logging.warning(\n", " f\"skipping {'.'.join(position)}.{child}, as it seems to be an intake-esm catalog\"\n", " )\n", "\n", "\n", "def traverse_tree(\n", " cat,\n", " subcat_callback,\n", " entry_callback,\n", " esm_cat_callback=warn_esm_cat,\n", " levels=0,\n", " position=list(),\n", "):\n", " \"\"\"Traverses an intake tree and call a function on everything it finds in it.\n", " subcat_callback is called on anything iterable (should be sub-catalogs)\n", " entry_callback is called on anything not iterable (should be datasets)\n", " esm_cat_callback will be called on intake_esm_catalogs. Defaults to a warning message, as loading them can consume a lot of memory and time.\n", " \"\"\"\n", " if levels and (levels - 1 < len(position)):\n", " return\n", " for child in list(cat):\n", " logging.debug(f\"processing {child}\")\n", " if detect_esm_cat(cat, child, position):\n", " esm_cat_callback(cat, child, position)\n", " continue\n", " try:\n", " cat[child]\n", " except FileNotFoundError as missing:\n", " logging.critical(\n", " f\"Error processing {'.'.join(position)}.{child}: File not found: {missing}\"\n", " )\n", " continue\n", "\n", " if isinstance(cat[child], Iterable):\n", " subcat_callback(cat, child, position)\n", " traverse_tree(\n", " cat[child],\n", " subcat_callback,\n", " entry_callback,\n", " levels=levels,\n", " position=position + [child],\n", " )\n", " else:\n", " entry_callback(cat, child, position)\n", "\n", "\n", "def detect_esm_cat(cat, child, position):\n", " try:\n", " if \"esm_datastore\" in str(cat._entries[child]._driver):\n", " return True\n", " except Exception as e:\n", " logging.error(\n", " f\"Can't really decide the type of \\n{position}{child}\\nran into {e}\"\n", " )\n", " return False\n", "\n", "\n", "def print_tree(cat, levels=0):\n", " def printer(cat, child, position, appendix=\"\"):\n", " try:\n", " parameters = [\n", " p[\"name\"] for p in cat[child].describe().get(\"user_parameters\", [])\n", " ]\n", " if len(parameters) > 0:\n", " parameter_str = f\"({', '.join(parameters)})\"\n", " else:\n", " parameter_str = \"\"\n", " except Exception as e:\n", " if str(e) == \"Source was not made from a catalog entry\":\n", " parameter_str = \"\"\n", " else:\n", " logging.warning(str(e))\n", " print(f\"{' '*len(position)}{child} {parameter_str} {appendix}\")\n", "\n", " def subcat_printer(*args, **kwargs):\n", " return printer(*args, **kwargs, appendix=\"🌳\")\n", "\n", " traverse_tree(cat, subcat_printer, printer, levels=levels)" ] }, { "cell_type": "code", "execution_count": 3, "id": "e3d42c91-0d68-4ff1-a1f4-8cce9bf1b055", "metadata": { "tags": [] }, "outputs": [], "source": [ "cat = intake.open_catalog(\"https://data.nextgems-h2020.eu/catalog.yaml\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "9259be85-59fc-4895-9d08-e6b23826278d", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ICON 🌳\n", " ngc4008 (time, zoom) \n", " ngc4007 (time, zoom) \n", " ngc4006 (time, zoom) \n", " ngc4005 (time, zoom) \n", " ngc3028 (time, zoom) \n", " ngc3028_bc_land \n", " ngc3026_WILL_BE_DELETED (time, zoom) \n", " HAMOCC 🌳\n", " ngc3542 🌳\n", " erc1011 🌳\n", " erc1017 🌳\n", "IFS 🌳\n", " IFS_9-FESOM_5-production 🌳\n", " IFS_4.4-FESOM_5-cycle3 🌳\n", " IFS_9-FESOM_5-cycle3 🌳\n", " IFS_9-NEMO_25-cycle3 🌳\n", " IFS_28-NEMO_25-cycle3 🌳\n", " IFS_4.4-FESOM_5-cycle3-nofastdata 🌳\n", " IFS_4.4-FESOM_5-cycle3-fastdata 🌳\n", " IFS_grids 🌳\n", "FESOM 🌳\n", " IFS_4.4-FESOM_5-cycle3 🌳\n", " IFS_28-FESOM_25-cycle3 🌳\n", " IFS_9-FESOM_5-cycle3 🌳\n", " FESOM_13_tropo_age_interpolated 🌳\n" ] } ], "source": [ "print_tree(cat, levels=2)" ] } ], "metadata": { "kernelspec": { "display_name": "0 Python 3 (based on the module python3/unstable", "language": "python", "name": "python3_unstable" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 5 }