Map Plotting with Intake and datashader#

Now with the data, we can think about how we want to visualize it. We build on the setup from the previous Tutorial. see https://easy.gems.dkrz.de/Processing/Intake/catalog-basics.html for the contents of the first box, with the addition of loading / installing the gribscran tool provided by Lukas Kluft and friends.

Note that 2m air temperature has different names depending no the model (‘tas’ for ICON, ‘2t’ for IFS)

import intake
import pandas as pd

# for ifs-fesom
try:
    import gribscan
except:
    %pip install gribscan
    import gribscan


pd.set_option("max_colwidth", None)  # makes the tables render better

catalog_file = "/work/ka1081/Catalogs/dyamond-nextgems.json"
cat = intake.open_esm_datastore(catalog_file)
hits = cat.search(
    variable_id=["2t", "tas"],
    frequency=["30minute", "1hour"],
    experiment_id="nextgems_cycle2",
)  # ONLY NEXTGEMS CYCLE 2
dataset_dict = hits.to_dataset_dict(cdf_kwargs={"chunks": {"time": 1}})

--> The keys in the returned dictionary of datasets are constructed as follows:
	'project.institution_id.source_id.experiment_id.simulation_id.realm.frequency.time_reduction.grid_label.level_type'

/sw/spack-levante/mambaforge-4.11.0-0-Linux-x86_64-sobz6z/lib/python3.9/site-packages/intake_esm/merge_util.py:270: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:
1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  ds = xr.open_zarr(path, **zarr_kwargs)

100.00% [5/5 02:48<00:00]

/sw/spack-levante/mambaforge-4.11.0-0-Linux-x86_64-sobz6z/lib/python3.9/site-packages/intake_esm/merge_util.py:270: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:
1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  ds = xr.open_zarr(path, **zarr_kwargs)

%run gem_helpers.ipynb
get_from_cat(cat.search(uri="refe.*ICMG.*"), ["simulation_id", "uri"])

	simulation_id	uri
0	HQYS	reference::/work/bm1235/a270046/cycle2-sync/tco2559-ng5/ICMGGall_update/json.dir/atm2d.json
1	HR0N	reference::/work/bm1235/a270046/cycle2-sync/tco3999-ng5/ICMGGc2/json.dir/atm2d.json
2	HR2N	reference::/work/bm1235/a270046/cycle2-sync/tco1279-orca025/nemo_deep/ICMGGc2/json.dir/atm2d.json
3	HR2N_nodeep	reference::/work/bm1235/a270046/cycle2-sync/tco1279-orca025/nemo/ICMGGc2/json.dir/atm2d.json

import os

for x in get_list_from_cat(cat.search(uri="refe.*"), "uri"):
    if not os.access(x[11:], os.R_OK):
        print(x[11:])

# Additional imports for this example (yes, we are generous)
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datashader as ds
from datashader.mpl_ext import dsshow
import sys
import re
import os
import getpass

# map projections
import cartopy.crs as crs

# more colormaps
import cmocean

import warnings, matplotlib

warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)

A bunch of helper functions and temporary directories.

def fix_time_axis(data):
    """Turn icon's yyyymmdd.f time axis into actual datetime format.

    This will fail for extreme values, but should be fine for a few centuries around today.
    """
    if (data.time.dtype != "datetime64[ns]") and (
        data["time"].units == "day as %Y%m%d.%f"
    ):
        data["time"] = pd.to_datetime(
            ["%8i" % x for x in data.time], format="%Y%m%d"
        ) + pd.to_timedelta([x % 1 for x in data.time.values], unit="d")


def get_from_cat(catalog, columns):
    """A helper function for inspecting an intake catalog.

    Call with the catalog to be inspected and a list of columns of interest."""

    if type(columns) == type(""):
        columns = [columns]
    return catalog.df[columns].drop_duplicates().sort_values(columns)


# make temporary directories in scratch
uid = getpass.getuser()
image_path = f"/scratch/{uid[0]}/{uid}/intake_demo_plots"
os.makedirs(image_path, exist_ok=True)
data_cache_path = f"/scratch/{uid[0]}/{uid}/intake_demo_data"
os.makedirs(data_cache_path, exist_ok=True)

Preparing the data: Putting the data on a grid#

For ICON Data, we can use the ‘grid_file_uri’ to access the grid. But we know where the file is saved locally and don’t need to download it via the internet. For IFS Data, the grid is served with the data.

# to plot, we need to associate a grid file with the data. The easiest way is to get the info from the grid_file_uri attribute in the data.
data_dict = {}
for name, dataset in dataset_dict.items():
    print(name)
    if "ICON-ESM" in name:  # ICON needs a bit of cleanup.
        grid_file_path = "/pool/data/ICON" + dataset.grid_file_uri.split(".de")[1]
        grid_data = xr.open_dataset(grid_file_path).rename(
            cell="ncells",  # the dimension has different names in the grid file and in the output.
            clon="lon",  # standardize the coordinate names.
            clat="lat",  # standardize the coordinate names.
        )
        data = xr.merge((dataset, grid_data))
        fix_time_axis(data)
        data_dict[name] = data
    else:
        data_dict[name] = dataset

nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR0N.atm.1hour.inst-or-acc.gn.2d
nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HQYS.atm.1hour.inst-or-acc.gn.2d
nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR2N.atm.1hour.inst-or-acc.gn.2d
nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR2N_nodeep.atm.1hour.inst-or-acc.gn.2d
nextGEMS.MPI-M.ICON-ESM.nextgems_cycle2.ngc2009.atm.30minute.inst.gn.ml

Fast plot using datashader#

Using datashader is much faster than the raw plot functions from matplotlib. Here we project the data once using cartopy. This can be skipped for simple lat/lon plots.

# Very fast plotting on projected maps with Pandas and Datashader


def transform_coords(data, projection):
    """Projects coordinates of the dataset into the desired map projection.

    Expects data.lon to be the longitudes and data.lat to be the latitudes

    """
    lon = data.lon
    lat = data.lat
    if "rad" in data.lon.units:  # icon comes in radian, ifs in degree.
        lon = np.rad2deg(data.lon)
        lat = np.rad2deg(data.lat)
    coords = (
        projection.transform_points(  # This re-projects our data to the desired grid.
            crs.Geodetic(), lon, lat
        )
    )
    return coords


def plot_map(
    data,
    projection,
    coords=None,  # we can compute them ourselves, if given data with lat and lon attached.
    colorbar_label="",
    title="",
    coastlines=True,
    extent=None,
    **kwargs,
):
    """Use datashader to plot a dataset from a pandas dataframe with a given projection.

    Required: data and projection.
    All other arguments are optional. Additional arguments will be passed directly to the plot routine (dsshow).
    """

    # If we are not given projected data, we project it ourselves.
    if coords is None:
        coords = projection.transform_points(crs.Geodetic(), data.lon, data.lat)

    # For datashader, we need the data in a pandas dataframe object.
    df = pd.DataFrame(
        data={
            "val": np.squeeze(
                data
            ),  # here we select which data to plot! - the squeeze removes empty dimensions.
            "x": coords[:, 0],  # from the projection above
            "y": coords[:, 1],  # from the projection above
        }
    )

    # the basis for map plotting.
    fig, ax = plt.subplots(figsize=(10, 4), subplot_kw={"projection": projection})
    fig.canvas.draw_idle()  # necessary to make things work

    # the plot itself
    artist = dsshow(df, ds.Point("x", "y"), ds.mean("val"), ax=ax, **kwargs)

    # making things pretty
    plt.title(title)
    if coastlines:
        ax.coastlines(color="grey")
    fig.colorbar(artist, label=colorbar_label)

    # for regional plots:
    if extent is not None:
        ax.set_extent(extent)

    return fig

projection = (
    crs.Mollweide()
)  # We will plot in mollweide projection - needs to match the plot calls later.

# We store all coordinates in a dictionary for later use.
coords_dict = (
    {}
)  ### WARNING THIS WILL BECOME BIG IF YOU DO IT FOR MORE THAN A FEW DATASETS!

for name, data in data_dict.items():
    coords_dict[name] = transform_coords(data, projection)

# Plotting the projected data
time = "2020-02-28T12:00:00"
# level = 0 # there is only level 0 in tas

for name, data in data_dict.items():
    var = data.intake_esm_varname[
        0
    ]  # variable names vary a bit by model. Here we get the correct one.
    timestr = str(data.time.sel(time=time).values)[
        :-10
    ]  # take the correct time and remove trailing zeros
    fig = plot_map(
        data=data[var].sel(time=time),  # our data for the plot
        projection=projection,  # generated in the cell above.
        coords=coords_dict[name],  # generated in the cell above.
        cmap=cmocean.cm.thermal,  # nice colorbar for temperature
        vmin=240,  # minimum for the plot ~-33C
        vmax=310,  # maximum for the plot ~37C
        colorbar_label="near-surface air temperature in K",
        title=f"{name}\n{timestr}",  # dataset name and time stamp.
    )
    filename = f"{image_path}/{var}_datashader_mollweide_{name}_{re.sub(':','',timestr)}.png"  # save to /scratch/
    print("saving to ", filename)
    plt.savefig(filename)

saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR0N.atm.1hour.inst-or-acc.gn.2d_2020-02-28T120000.png
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HQYS.atm.1hour.inst-or-acc.gn.2d_2020-02-28T120000.png
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR2N.atm.1hour.inst-or-acc.gn.2d_2020-02-28T120000.png
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR2N_nodeep.atm.1hour.inst-or-acc.gn.2d_2020-02-28T120000.png
saving to  /scratch/k/k202134/intake_demo_plots/tas_datashader_mollweide_nextGEMS.MPI-M.ICON-ESM.nextgems_cycle2.ngc2009.atm.30minute.inst.gn.ml_2020-02-28T120000.png

../../_images/d6acdb010de5a9675e066a30f22ccc770858ad8eeb0e6c25dfd16e6130f6e710.png

../../_images/20a1a44ec4bd984ca3e0131a59a0203c4514756cb42ac5626284e13f0292cc6f.png

../../_images/025acad1c745acd7c9c20cf2afa660eb1ee142fd2d63f89f7ff8e5fbdde6d898.png

../../_images/9823ef7d8e2f11650f9bf277198cd62e5e105aa7952a6a47d906638c2b189e07.png

../../_images/77344ce47f4e3dc77950912fd6af72bcc8df83c77d1db3d1c23d1030f5743ab4.png

Regional plots just require one additional argument#

# Plotting the projected data
time = "2020-02-28T12:00:00"
level = 0  # there is only level 0 in tas, but we still need to provide it
for name, data in data_dict.items():
    var = data.intake_esm_varname[
        0
    ]  # variable names vary a bit by model. Here we get the correct one.
    timestr = str(data.time.sel(time=time).values)[:-10]
    print(timestr)
    fig = plot_map(
        data=data[var].sel(time=time),  # our data for the plot
        projection=projection,  # generated in the cell above.
        coords=coords_dict[name],  # generated in the cell above.
        cmap=cmocean.cm.thermal,  # nice colorbar for temperature
        vmin=240,  # minimum for the plot ~-33C
        vmax=310,  # maximum for the plot ~37C
        colorbar_label="near-surface air temperature in K",
        title=f"{name}\n{timestr}",  # dataset name and time stamp.
        extent=[-20, 45, 25, 70],  # HERE WE CHOSE THE EXTENT
    )
    filename = f"{image_path}/{var}_datashader_mollweide_{name}_subset_{re.sub(':','',timestr)}.png"
    print("saving to ", filename)
    plt.savefig(filename)

2020-02-28T12:00:00
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR0N.atm.1hour.inst-or-acc.gn.2d_subset_2020-02-28T120000.png
2020-02-28T12:00:00
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HQYS.atm.1hour.inst-or-acc.gn.2d_subset_2020-02-28T120000.png
2020-02-28T12:00:00
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR2N.atm.1hour.inst-or-acc.gn.2d_subset_2020-02-28T120000.png
2020-02-28T12:00:00
saving to  /scratch/k/k202134/intake_demo_plots/2t_datashader_mollweide_nextGEMS.ECMWF-AWI.IFS-FESOM.nextgems_cycle2.HR2N_nodeep.atm.1hour.inst-or-acc.gn.2d_subset_2020-02-28T120000.png
2020-02-28T12:00:00
saving to  /scratch/k/k202134/intake_demo_plots/tas_datashader_mollweide_nextGEMS.MPI-M.ICON-ESM.nextgems_cycle2.ngc2009.atm.30minute.inst.gn.ml_subset_2020-02-28T120000.png

../../_images/5d78724688ff6cbabb422873e0addf1af9713db8b240eea040579c436e2f869e.png

../../_images/4b7024c3c09cd369120e6c977a45352cb653f1c37ec62fde87521c62830dfea2.png

../../_images/498014542e1c564e8104e48555bb98a549c8c4e87ee4b44f7457749413c49a38.png

../../_images/0febf73218b451f1b43a4f317a98a64a6f13b7384fa74febb71c744a2d3cad60.png

../../_images/d260de41613d502f866e082acc2655e51138ddca48e7cc5e7d83aeb9642470dd.png

Map Plotting with Intake and datashader#

Preparing the data: Putting the data on a grid#

Fast plot using datashader#

Regional plots just require one additional argument#

This Page