{ "cells": [ { "cell_type": "markdown", "id": "f620e972-fadc-42ea-94f0-b25871ce5f2f", "metadata": {}, "source": [ "# Some helper functions" ] }, { "cell_type": "code", "execution_count": 1, "id": "e528060f-9e0a-40c6-b329-73adb64f6a59", "metadata": {}, "outputs": [], "source": [ "import os\n", "import getpass\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "id": "eb713e72-1f87-4c0f-9f9d-cc126136b3dc", "metadata": {}, "outputs": [], "source": [ "def fix_time_axis(data):\n", " \"\"\"Turn icon's yyyymmdd.f time axis into actual datetime format.\n", "\n", " This will fail for extreme values, but should be fine for a few centuries around today.\n", " \"\"\"\n", " if (data.time.dtype != \"datetime64[ns]\") and (\n", " data[\"time\"].units == \"day as %Y%m%d.%f\"\n", " ):\n", " data[\"time\"] = pd.to_datetime(\n", " [\"%8i\" % x for x in data.time], format=\"%Y%m%d\"\n", " ) + pd.to_timedelta([x % 1 for x in data.time.values], unit=\"d\")\n", "\n", "\n", "def get_from_cat(catalog, columns):\n", " \"\"\"A helper function for inspecting an intake catalog.\n", "\n", " Call with the catalog to be inspected and a list of columns of interest.\"\"\"\n", " import pandas as pd\n", "\n", " pd.set_option(\"max_colwidth\", None) # makes the tables render better\n", "\n", " if type(columns) == type(\"\"):\n", " columns = [columns]\n", " return (\n", " catalog.df[columns]\n", " .drop_duplicates()\n", " .sort_values(columns)\n", " .reset_index(drop=True)\n", " )\n", "\n", "\n", "def get_list_from_cat(catalog, column):\n", " \"\"\"A helper function for getting the contents of a column in an intake catalog.\n", "\n", " Call with the catalog to be inspected and the column of interest.\"\"\"\n", " return sorted(catalog.unique(column)[column][\"values\"])" ] }, { "cell_type": "code", "execution_count": 3, "id": "b54163d1-9865-4a21-99de-c98926fd0b76", "metadata": {}, "outputs": [], "source": [ "def make_tempdir(name):\n", " \"\"\"Creates a temporary directory in your /scratch/ and returns its path as string\"\"\"\n", "\n", " uid = getpass.getuser()\n", " temppath = os.path.join(\"/scratch/\", uid[0], uid, name)\n", " os.makedirs(temppath, exist_ok=True)\n", " return temppath" ] }, { "cell_type": "code", "execution_count": 4, "id": "14e5c64b-8677-42d5-a1f2-6a497b2229f2", "metadata": {}, "outputs": [], "source": [ "def find_grids(dataset):\n", " \"\"\"Generic ICON Grid locator\n", "\n", " This function checks an xarray dataset for attributes that contain \"grid_file_uri\", and checks if it can map them to a local path.\n", " It also checks for \"grid_file_name\"\n", "\n", " It returns a list of paths on disk that are readable (os.access(x, os.R_OK)).\n", " \"\"\"\n", " uris = [\n", " dataset.attrs[x] for x in dataset.attrs if \"grid_file_uri\" in x\n", " ] # this thing might come in one of various names...\n", " search_paths = [\n", " re.sub(\"http://icon-downloads.mpimet.mpg.de\", \"/pool/data/ICON\", x)\n", " for x in uris\n", " ] + [\n", " os.path.basename(x) for x in uris\n", " ] # plausible mappings on mistral.\n", " if \"grid_file_path\" in dataset.attrs:\n", " search_paths.append(dataset.attrs[\"grid_file_path\"])\n", " search_paths.append(\n", " os.path.basename(dataset.attrs[\"grid_file_path\"])\n", " ) # also check the current dir.\n", " paths = [\n", " x for x in search_paths if (os.access(x, os.R_OK))\n", " ] # remove things that don't exist.\n", " if not paths:\n", " message = \"Could not determine grid file!\"\n", " if search_paths:\n", " message = message + \"\\nI looked in\\n\" + \"\\n\".join(search_paths)\n", " if uris:\n", " message = message + (\n", " \"\\nPlease check %s for a possible grid file\" % (\" or \").join(uris)\n", " )\n", " raise Exception(message)\n", " if len(set(paths)) > 1:\n", " print(\n", " \"Found multiple conflicting grid files. Using the first one.\",\n", " file=sys.stderr,\n", " )\n", " print(\"Files found:\", file=sys.stderr)\n", " print(\"\\n\".join(paths), file=sys.stderr)\n", " return paths\n", "\n", "\n", "def add_grid(dataset):\n", " \"\"\"Generic icon grid adder.\n", "\n", " Calls find_grids to locate a grid file, and - if it finds one - adds this grid file to a Dataset.\n", "\n", " also tries to ensure that clon has the same dimensions as the data variables.\n", " \"\"\"\n", " paths = find_grids(dataset)\n", " grid = xr.open_dataset(paths[0])\n", " rename = (\n", " {}\n", " ) # icon uses different dimension names in the output than in the grid file. (whyever...)\n", " if \"ncells\" in dataset.dims:\n", " grid_ncells = grid.clon.dims[0]\n", " rename = {grid_ncells: \"ncells\"}\n", " drops = set(\n", " [x for x in grid.coords if x in dataset.data_vars or x in dataset.coords]\n", " + [x for x in grid.data_vars if x in dataset.data_vars or x in dataset.coords]\n", " )\n", " return xr.merge((dataset.drop(drops), grid.rename(rename)))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (based on the module python3/2022.01)", "language": "python", "name": "python3_2022_01" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" } }, "nbformat": 4, "nbformat_minor": 5 }