Climate Attribute Development- NASA Daymet#

../_images/animated_years.gif

Fig. 11 NASA Daymet data is spatially distributed time series climate data covering North America and spanning 1980 to 2022.#

Accessing and Registering for NASA DAYMET Data#

NASA’s DAYMET provides daily surface weather and climatological summaries for North America. To access and automate the download of DAYMET data, follow these steps:

  1. Register: Before you can download data, you need to register with ORNL DAAC.

  2. Access the Data: Once registered, navigate to the DAYMET Data Collection page where you can explore available data sets.

  3. Automated Download: For automated data downloads, you can use the DAYMET web services. Detailed instructions and examples for using these services can be found in the DAYMET documentation.

Available climate variables are described in the Daymet Catalogue:

Variable

Description

Units

tmax

Daily maximum 2-meter air temperature

°C

tmin

Daily minimum 2-meter air temperature

°C

prcp

Daily total precipitation

mm

srad

Incident shortwave radiation flux density

\(W/m^2\)

vp

Water vapor pressure

Pa

swe

Snow water equivalent

\(kg/m^2\)

dayl

Duration of the daylight period

seconds/day

import os 
import numpy as np
import geopandas as gpd
import xarray as xr
import rioxarray as rxr
import pandas as pd

import multiprocessing as mp

from daymet_processing_functions import *
region = 'Vancouver_Island'
base_dir = os.path.dirname(os.getcwd())
polygon_path = os.path.join(os.getcwd(), f'data/region_polygons/{region}.geojson')
region_polygon = gpd.read_file(polygon_path)
daymet_proj = '+proj=lcc +lat_1=25 +lat_2=60 +lat_0=42.5 +lon_0=-100 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs'
daymet_folder ='/media/danbot/Samsung_T51/large_sample_hydrology/common_data/DAYMET'
# import the daymet tile index
tile_fpath = os.path.join(base_dir, 'notebooks/data/daymet_data/Daymet_v4_Tiles.geojson')
dm_tiles = gpd.read_file(tile_fpath)
print(f'daymet tile index is in {dm_tiles.crs}')

# get the intersection with the region polygon
tiles_df = dm_tiles.sjoin(region_polygon)
tiles_df = tiles_df.sort_values(by=['Latitude (Min)', 'Longitude (Min)'])
tile_rows = tiles_df.groupby('Latitude (Min)')['TileID'].apply(list).tolist()
tile_ids = tiles_df['TileID'].values
print(f'There are {len(tile_ids)} covering tiles.')
# save the dataframe of relevant tiles to a .shp file 
# gdal is used to clip the spatial datasets with a .shp mask.
# ensure the mask is in the same crs as the tiles.
reproj_mask_path = polygon_path.replace('.geojson', '_daymet_mask.shp')
if not os.path.exists(reproj_mask_path):
    mask = region_polygon.to_crs(daymet_proj)
    mask.to_file(reproj_mask_path)
# * 2129: Daily
# * 2131: Monthly average
# * 2130: Annual average
code = 2129

daymet_params = ['prcp', 'tmax', 'tmin', 'prcp', 'srad', 'swe', 'vp']
computed_daymet_params = ['high_prcp_freq','low_prcp_freq', 'high_prcp_duration', 'low_prcp_duration']

years = list(range(1980,2023))

daymet_url_base = f'https://thredds.daac.ornl.gov/thredds/fileServer/ornldaac/{code}/tiles/'                
base_command = f'wget -q --show-progress --progress=bar:force --limit-rate=3m {daymet_url_base}'


code_dict = {
    2129: 'daily', 2130: 'annual', 2131: 'monthly',
}

batch_commands = []
for yr in years:
    for param in daymet_params:
        for tile in tile_ids:
            param_dir = os.path.join(daymet_folder, param)
            if not os.path.exists(param_dir):
                os.mkdir(param_dir)

            file_path = f'{yr}/{tile}_{yr}/{param}.nc'
            save_fpath = os.path.join(daymet_folder, f'{param}/{tile}_{yr}_{param}.nc')

            if not os.path.exists(save_fpath):
                cmd = base_command + f'{file_path} -O {save_fpath}'
                batch_commands.append(cmd)
                

# # # download the files in parallel
print(f'{len(batch_commands)} files to download')
# with mp.Pool() as pl:
#     pl.map(os.system, batch_commands)

Process NASA DAYMET Data#

To process the NASA DAYMET data spanning from 1980 to 2022 for specific parameters (‘tmax’, ‘tmin’, ‘prcp’, ‘srad’, ‘swe’, ‘vp’) within a set of polygons, we will use the following approach:

  1. Data Preparation: Merge the .nc (NetCDF) tiles in an xarray dataset.

  2. Temporal Statistics: For each year and parameter:
    a. Load the relevant .nc files using xarray,
    b. Compute mean/max/total annual values for each year (use resample method on the time coordinate),
    c. Compute mean annual (mean of years),
    d. Output file as raster (tif).

  3. Raster Crop: Use gdalwarp to crop the output raster to the region.

  4. Attribute Extraction: for the set of basins generated in Notebook 4, mask the output rasters to get 6 climate indices per basin.

def _open_dataset(f, grp=None):
    """Open a dataset using ``xarray``.
    From pydaymet: https://github.com/hyriver/pydaymet/blob/44ca8be043f2ac27bb815ac3e70e33094da22730/pydaymet/pydaymet.py
    """
    with xr.open_dataset(f, engine="netcdf4", chunks={},
                         mask_and_scale=True, cache=False) as ds:
        return ds.load()


def resample_annual(param, tile_id, output_path):
    """
    Adapted from pydaymet get_bygeom()
    """
    param_folder = os.path.join(daymet_folder, param)
    clm_files = sorted([os.path.join(param_folder, e) for e in os.listdir(param_folder) if (e.startswith(tid) & ~e.endswith('.xml'))])   

    ds = xr.concat((_open_dataset(f) for f in clm_files), dim='time')[param]    
    #  write crs BEFORE AND AFTER resampling!
    ds.rio.write_nodata(np.nan, inplace=True)
    ds = ds.rio.write_crs(daymet_proj)
    
    if param in ['prcp']:
        # note that sum handles nan values differently than max and mean
        # explicitly set skipna=False or the assembled mosaic will be wonky
        ds = ds.resample(time='1y').sum(keep_attrs=True, skipna=False)
    elif param == 'swe':
        # HYSETS uses average annual maximum
        ds = ds.resample(time='1y').max(keep_attrs=True)
    else:
        ds = ds.resample(time='1y').mean(keep_attrs=True)

    annual_mean = ds.mean('time', keep_attrs=True)
    annual_mean.rio.write_crs(daymet_proj)
    annual_mean.rio.write_nodata(np.nan, inplace=True)
    annual_mean.rio.to_raster(output_fpath)
    
def create_tile_mosaic(param, reproj_fpath):
    year = 1980
    param_folder = os.path.join(daymet_folder, param)
    data_folder = f'data/daymet_data'
    
    # fpath = f'{tid}_{year}_{param}.nc'
    file_pattern = f'*_{param}_mean_annual.tiff'
    vrt_fname = f'{param}_mosaic.vrt'
    vrt_fpath = os.path.join(data_folder, vrt_fname)

    # warp and save the file path
    reproj_fpath = os.path.join(data_folder, f'{param}_mosaic_3005.tiff')
    # assemble the mosaic
    cmd = f'gdalbuildvrt {vrt_fpath} {data_folder}/{param}/{file_pattern}'
    # print(cmd)
    # print(asdf)
    warp_cmd = f'gdalwarp -multi -cutline {reproj_mask_path} -crop_to_cutline -wo CUTLINE_ALL_TOUCHED=TRUE -t_srs EPSG:3005 -co TILED=YES -co BIGTIFF=YES -co COMPRESS=DEFLATE -co NUM_THREADS=ALL_CPUS {vrt_fpath} {reproj_fpath}'
    if not os.path.exists(vrt_fpath):
        os.system(cmd)
        os.system(warp_cmd)
        os.remove(vrt_fpath)
    

Create raster (tif) files#

Given netcdf file inputs describing the 6 parameters on a daily frequency, summarize down to one spatial layer (mean annual) for each parameter that we will clip to the region polygon.

for param in daymet_params:
    print(f'processing {param}')
    param_folder = os.path.join(daymet_folder, param)
    for tid in tile_ids:
        output_file = f'{tid}_{param}_mean_annual.tiff'
        folder = f'data/daymet_data/{param}/'
        if not os.path.exists(folder):
            os.mkdir(folder)
        output_fpath = os.path.join(folder, output_file)
        if not os.path.exists(output_fpath):
            try:
                resample_annual(param, tid, output_fpath)
            except Exception as ex:
                print(f'Resampling failed on {param} tile id: {tid}')
                print(ex)
                continue
    reproj_fpath = os.path.join('data/daymet_data/', f'{param}_mosaic_3005.tiff')
    if not os.path.exists(reproj_fpath):
        create_tile_mosaic(param, reproj_fpath)
    

Compute derived climate indices#

# compute low and high precip event frequency and duration
for param in ['high_prcp_freq','low_prcp_freq', 'high_prcp_duration', 'low_prcp_duration']:
    output_fpath = os.path.join('data/daymet_data/', f'{param}_mosaic_3005.tiff')
    if os.path.exists(output_fpath):
        print(f'{param} mosaic file already processed.')
        continue
    for tid in tile_ids:
        print(f'Processing tile id {tid}.')
        mean_annual_fpath = os.path.join('data/temp/', f'{tid}_{param}_mean_annual.tiff')
        if os.path.exists(mean_annual_fpath):
            print(f'   ...tile id {tid} already processed.')
        else:
            tiles_processed = set_computation_by_param(tid, param, mean_annual_fpath)
            print(f'   ...finished processing mean annual {param} for tile id {tid}.')
    
    file_pattern = f'*_{param}_mean_annual.tiff'
    create_tile_mosaic(param, output_fpath, file_pattern)