Source code for NEDAS.models.nextsim.dg.restart

from datetime import datetime
import os
import typing
import threading

import netCDF4 # type: ignore
import numpy as np
import pyproj # type: ignore

from NEDAS.grid import RegularGrid
from NEDAS.models.nextsim.dg.perturb import gen_perturb, apply_perturb

_proj:pyproj.Proj = pyproj.Proj(proj='stere', a=6378273, b=6356889.448910593, lat_0=90., lon_0=-45., lat_ts=60.)

thread_lock = threading.Lock()
[docs] def read_var(fname:str, varnames:list[str]) -> np.ma.MaskedArray: """reading a variable from a netcdf file Parameters ---------- fname : str forcing file name varnames : list[str] list of variable names itime : int time index in the forcing file """ data: list[np.ndarray] = [] with thread_lock: with netCDF4.Dataset(fname, 'r') as f: # read the variable for vname in varnames: data.append(f[vname][:]) return np.ma.array(data)
[docs] def write_var(fname:str, varnames: list[str], data: np.ndarray) -> None: """Write the perturbed variable back to the forcing file Parameters ---------- fname : str forcing file name varnames : list[str] list of variable names itime : int time index in the forcing file """ # We assume all variables in the forcing file exists assert os.path.exists(fname), f'{fname} does not exist; Please copy the forcing file to the correct path first.' with thread_lock: with netCDF4.Dataset(fname, 'r+') as f: for i, vname in enumerate(varnames): f[vname][:] = data[i] f.sync()
[docs] def get_restart_filename(file_options:dict, i_ens: int, time: datetime) -> str: # get the restart file name fname: str try: fname = file_options['format'].format(i=i_ens, time=time.strftime(file_options['time_format'])) except KeyError: try: fname = file_options['format'].format(i=i_ens, time=time.strftime(file_options['time_format'])) except KeyError: raise RuntimeError('Currently, we only supports keyword of 1. "time_format",' '2. "time_format"+"i".' 'See the example yaml file for more information. ' 'Modified the code if you have other requirements.') return fname
[docs] def perturb_restart(restart_options:dict, file_options:dict, debug=False) -> None: """perturb the initial conditions in restart files Parameters ---------- restart_options : dict perturbation options under the section of `perturb` from the yaml file file_options : dict This dictionary is constructed before the function is called. Keys: fname (str, path to the file to perturb), lon_name (str, longitude variable name from the restart section of the model config), lat_name (str, latitude variable name from the restart section of the model config). """ # perturbation arrays pert: np.ndarray[typing.Any, np.dtype[np.float64]] # get the restart file name fname:str = file_options['fname'] # get grid object with thread_lock: with netCDF4.Dataset(fname, 'r') as f: lon = f[file_options['lon_name']][:] lat = f[file_options['lat_name']][:] x, y = _proj(lon, lat) grid = RegularGrid(_proj, x, y) # get options for perturbing the forcing variables options = restart_options['variables'] for i, varname in enumerate(options['names']): if typing.TYPE_CHECKING: assert type(varname) == str, 'variable name must be a string' # convert the horizontal correlation length scale to grid points hcorr:int = np.rint(float(options['hcorr'][i])/grid.dx) # do perturbation pert = gen_perturb(grid, options['type'][i], float(options['amp'][i]), hcorr) # apply perturbations to the variable data # in the case of vector fields, we need to split the variable name varname_list: list[str] = varname.split(';') # read the variable data from forcing file data: np.ndarray = read_var(fname, varname_list) # apply perturbations to the variable data data = apply_perturb(grid, data, pert, options['type'][i]) # apply lower and upper bounds lb = float(options['lower_bounds'][i]) if options['lower_bounds'][i] != 'None' else -np.inf ub = float(options['upper_bounds'][i]) if options['upper_bounds'][i] != 'None' else np.inf data = np.minimum(np.maximum(data, lb), ub) # write the perturbed variable back to the forcing file write_var(fname, varname_list, data)