diff --git a/mhkit/wave/io/hindcast/hindcast.py b/mhkit/wave/io/hindcast/hindcast.py new file mode 100644 index 000000000..69787806a --- /dev/null +++ b/mhkit/wave/io/hindcast/hindcast.py @@ -0,0 +1,286 @@ +import numpy as np +import pandas as pd +from rex import MultiYearWaveX, WaveX +import sys +from time import sleep + + + +def region_selection(lat_lon): + ''' + Returns the name of the predefined region in which the given coordinates reside. + Can be used to check if the passed lat/lon pair is within the WPTO hindcast dataset. + + Parameters + ---------- + lat_lon : list or tuple + Latitude and longitude coordinates as flaots or integers + + Returns + ------- + region : string + Name of predefined region for given coordinates + ''' + assert isinstance(lat_lon, (list,tuple)), 'lat_lon must be of type list or tuple' + assert isinstance(lat_lon[0], (float,int)), 'lat_lon values must be of type float or int' + assert isinstance(lat_lon[1], (float,int)), 'lat_lon values must be of type float or int' + + rDict = { + 'Hawaii':{ 'lat':[15.0,27.000002], 'lon':[-164.0,-151.0] }, + 'West_Coast':{ 'lat':[30.0906, 48.8641], 'lon':[-130.072, -116.899] }, + 'Atlantic':{ 'lat':[24.382, 44.8247], 'lon':[-81.552, -65.721] }, + } + + region_search = lambda x: all( ( True if rDict[x][dk][0] <= d <= rDict[x][dk][1] else False + for dk, d in {'lat':lat_lon[0],'lon':lat_lon[1]}.items() ) ) + region = [key for key in rDict if region_search(key)] + + if len(region)==0: + print('ERROR: coordinates out of bounds') + else: + return region[0] + + + +def request_wpto_point_data(data_type, parameter, lat_lon, years, tree=None, + unscale=True, str_decode=True,hsds=True): + + """ + Returns data from the WPTO wave hindcast hosted on AWS at the specified latitude and longitude point(s), + or the closest available point(s). + Visit https://registry.opendata.aws/wpto-pds-us-wave/ for more information about the dataset and available + locations and years. + + Note: To access the WPTO hindcast data, you will need to configure h5pyd for data access on HSDS. + Please see the WPTO_hindcast_example notebook for more information. + + Parameters + ---------- + data_type : string + Data set type of interest + Options: '3-hour' '1-hour' + parameter: string or list of strings + Dataset parameter to be downloaded + 3-hour dataset options: 'directionality_coefficient', 'energy_period', 'maximum_energy_direction' + 'mean_absolute_period', 'mean_zero-crossing_period', 'omni-directional_wave_power', 'peak_period' + 'significant_wave_height', 'spectral_width', 'water_depth' + 1-hour dataset options: 'directionality_coefficient', 'energy_period', 'maximum_energy_direction' + 'mean_absolute_period', 'mean_zero-crossing_period', 'omni-directional_wave_power', 'peak_period' + 'significant_wave_height', 'spectral_width', 'water_depth', 'maximim_energy_direction', + 'mean_wave_direction', 'frequency_bin_edges' + lat_lon: tuple or list of tuples + Latitude longitude pairs at which to extract data + years : list + Year(s) to be accessed. The years 1979-2010 available. Examples: [1996] or [2004,2006,2007] + tree : str | cKDTree (optional) + cKDTree or path to .pkl file containing pre-computed tree + of lat, lon coordinates, default = None + unscale : bool (optional) + Boolean flag to automatically unscale variables on extraction + Default = True + str_decode : bool (optional) + Boolean flag to decode the bytestring meta data into normal + strings. Setting this to False will speed up the meta data read. + Default = True + hsds : bool (optional) + Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS + behind HSDS. Setting to False will indicate to look for files on + local machine, not AWS. Default = True + + Returns + --------- + data: DataFrame + Data indexed by datetime with columns named for parameter and cooresponding metadata index + meta: DataFrame + Location metadata for the requested data location + """ + + assert isinstance(parameter, (str, list)), 'parameter must be of type string or list' + assert isinstance(lat_lon, (list,tuple)), 'lat_lon must be of type list or tuple' + assert isinstance(data_type, str), 'data_type must be a string' + assert isinstance(years,list), 'years must be a list' + assert isinstance(tree,(str,type(None))), 'tree must be a string' + assert isinstance(unscale,bool), 'unscale must be bool type' + assert isinstance(str_decode,bool), 'str_decode must be bool type' + assert isinstance(hsds,bool), 'hsds must be bool type' + + if 'directional_wave_spectrum' in parameter: + sys.exit('This function does not support directional_wave_spectrum output') + + # check for multiple region selection + if isinstance(lat_lon[0], float): + region = region_selection(lat_lon) + else: + reglist = [] + for loc in lat_lon: + reglist.append(region_selection(loc)) + if reglist.count(reglist[0]) == len(lat_lon): + region = reglist[0] + else: + sys.exit('Coordinates must be within the same region!') + + if data_type == '3-hour': + wave_path = f'/nrel/US_wave/'+region+'/'+region+'_wave_*.h5' + elif data_type == '1-hour': + wave_path = f'/nrel/US_wave/virtual_buoy/'+region+'/'+region+'_virtual_buoy_*.h5' + else: + print(f'ERROR: invalid data_type') + pass + waveKwargs = {'tree':tree,'unscale':unscale,'str_decode':str_decode, 'hsds':hsds, + 'years':years} + data_list = [] + + with MultiYearWaveX(wave_path, **waveKwargs) as rex_waves: + if isinstance(parameter, list): + for p in parameter: + temp_data = rex_waves.get_lat_lon_df(p,lat_lon) + col = temp_data.columns[:] + for i,c in zip(range(len(col)),col): + temp = f'{p}_{i}' + temp_data = temp_data.rename(columns={c:temp}) + + data_list.append(temp_data) + data= pd.concat(data_list, axis=1) + + else: + data = rex_waves.get_lat_lon_df(parameter,lat_lon) + col = data.columns[:] + + for i,c in zip(range(len(col)),col): + temp = f'{parameter}_{i}' + data = data.rename(columns={c:temp}) + + meta = rex_waves.meta.loc[col,:] + meta = meta.reset_index(drop=True) + return data, meta + +def request_wpto_directional_spectrum(lat_lon, year, tree=None, + unscale=True, str_decode=True,hsds=True): + + """ + Returns directional spectra data from the WPTO wave hindcast hosted on AWS at the specified latitude and longitude point(s), + or the closest available point(s). + Visit https://registry.opendata.aws/wpto-pds-us-wave/ for more information about the dataset and available + locations and years. + + Note: To access the WPTO hindcast data, you will need to configure h5pyd for data access on HSDS. + Please see the WPTO_hindcast_example notebook for more information. + + Parameters + ---------- + lat_lon: tuple or list of tuples + Latitude longitude pairs at which to extract data + year : string + Year to be accessed. The years 1979-2010 available. Only one year can be requested at a time. + tree : str | cKDTree (optional) + cKDTree or path to .pkl file containing pre-computed tree + of lat, lon coordinates, default = None + unscale : bool (optional) + Boolean flag to automatically unscale variables on extraction + Default = True + str_decode : bool (optional) + Boolean flag to decode the bytestring meta data into normal + strings. Setting this to False will speed up the meta data read. + Default = True + hsds : bool (optional) + Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS + behind HSDS. Setting to False will indicate to look for files on + local machine, not AWS. Default = True + + Returns + --------- + data: xarray + Coordinates as datetime, frequency, and direction for data at specified location(s) + meta: DataFrame + Location metadata for the requested data location + """ + assert isinstance(lat_lon, (list,tuple)), 'lat_lon must be of type list or tuple' + assert isinstance(year,str), 'years must be a string' + assert isinstance(tree,(str,type(None))), 'tree must be a sring' + assert isinstance(unscale,bool), 'unscale must be bool type' + assert isinstance(str_decode,bool), 'str_decode must be bool type' + assert isinstance(hsds,bool), 'hsds must be bool type' + + # check for multiple region selection + if isinstance(lat_lon[0], float): + region = region_selection(lat_lon) + else: + reglist = [] + for loc in lat_lon: + reglist.append(region_selection(loc)) + if reglist.count(reglist[0]) == len(lat_lon): + region = reglist[0] + else: + sys.exit('Coordinates must be within the same region!') + + wave_path = f'/nrel/US_wave/virtual_buoy/'+region+'/'+region+'_virtual_buoy_'+year+'.h5' + parameter = 'directional_wave_spectrum' + waveKwargs = {'tree':tree,'unscale':unscale,'str_decode':str_decode, 'hsds':hsds} + + with WaveX(wave_path, **waveKwargs) as rex_waves: + # Get graphical identifier + gid = rex_waves.lat_lon_gid(lat_lon) + + # Setup index and columns + if isinstance(gid, (int, np.integer)): + columns = [gid] + else: + columns = gid + time_index = rex_waves.time_index + frequency = rex_waves['frequency'] + direction = rex_waves['direction'] + index = pd.MultiIndex.from_product( + [time_index, frequency, direction], + names=['time_index', 'frequency', 'direction'] + ) + + # Create bins for multiple smaller API dataset requests + N=6 + length = len(rex_waves) + quotient=length//N + remainder=length%N + bins = [i*quotient for i in range(N+1) ] + bins[-1]+= remainder + index_bins = (np.array(bins)*len(frequency)*len(direction)).tolist() + + # Request multiple datasets and add to dictionary + datas={} + for i in range(len(bins)-1): + idx=index[index_bins[i]:index_bins[i+1]] + + # Request with exponential back off wait time + sleep_time = 2 + num_retries = 4 + for x in range(0, num_retries): + try: + data_array = rex_waves[parameter, bins[i]:bins[i+1], :, :, gid] + str_error = None + + except Exception as e: + str_error = str(e) + + if str_error: + sleep(sleep_time) + sleep_time *= 2 + else: + break + + ax1 = np.product(data_array.shape[:3]) + ax2 = data_array.shape[-1] if len(data_array.shape) == 4 else 1 + data_array = data_array.reshape(ax1, ax2) + + df = pd.DataFrame(data_array, columns=columns, index=idx) + df.name = parameter + datas[i]=df + + # Append each request into an xarray + data_raw=datas[0] + for i in list(datas.keys())[1:]: + data_raw = pd.concat([data_raw,datas[i]]) + data = data_raw.to_xarray() + + # Get metadata + meta = rex_waves.meta.loc[columns,:] + meta = meta.reset_index(drop=True) + + return data, meta \ No newline at end of file diff --git a/mhkit/wave/io/hindcast/wind_toolkit.py b/mhkit/wave/io/hindcast/wind_toolkit.py new file mode 100644 index 000000000..bb0fbc83f --- /dev/null +++ b/mhkit/wave/io/hindcast/wind_toolkit.py @@ -0,0 +1,268 @@ +import pandas as pd +from rex import MultiYearWindX +import matplotlib.pyplot as plt + + +def region_selection(lat_lon, preferred_region=''): + ''' + Returns the name of the predefined region in which the given coordinates reside. + Can be used to check if the passed lat/lon pair is within the WIND Toolkit hindcast dataset. + + Parameters + ---------- + lat_lon : tuple + Latitude and longitude coordinates as floats or integers + + preferred_region : string (optional) + Latitude and longitude coordinates as floats or integers + + Returns + ------- + region : string + Name of predefined region for given coordinates + ''' + assert isinstance(lat_lon, tuple), 'lat_lon must be of type list or tuple' + assert len(lat_lon)==2, 'lat_lon must be of length 2' + assert isinstance(lat_lon[0], (float,int)), 'lat_lon values must be of type float or int' + assert isinstance(lat_lon[1], (float,int)), 'lat_lon values must be of type float or int' + assert isinstance(preferred_region, str), 'preferred_region must be of type string' + + # Note that this check is fast, but not robust because region are not + # rectangular on a lat-lon grid + rDict = { + 'CA_NWP_overlap':{'lat':[41.213, 42.642], 'lon':[-129.090, -121.672]}, + 'Offshore_CA':{ 'lat':[31.932, 42.642], 'lon':[-129.090, -115.806]}, + 'Hawaii':{ 'lat':[15.565, 26.221], 'lon':[-164.451, -151.278]}, + 'NW_Pacific':{ 'lat':[41.213, 49.579], 'lon':[-130.831, -121.672]}, + 'Mid_Atlantic':{ 'lat':[37.273, 42.211], 'lon':[-76.427, -64.800]}, + } + + region_search = lambda x: all( ( True if rDict[x][dk][0] <= d <= rDict[x][dk][1] else False + for dk, d in {'lat':lat_lon[0],'lon':lat_lon[1]}.items() ) ) + region = [key for key in rDict if region_search(key)] + + if region[0] == 'CA_NWP_overlap': + if preferred_region == 'Offshore_CA': + region[0] = 'Offshore_CA' + elif preferred_region == 'NW_Pacific': + region[0] = 'NW_Pacific' + else: + raise TypeError(f"Preferred_region ({preferred_region}) must be 'Offshore_CA' or 'NW_Pacific' when lat_lon {lat_lon} falls in the overlap region") + + if len(region)==0: + raise TypeError(f'Coordinates {lat_lon} out of bounds. Must be within {rDict}') + else: + return region[0] + + +def plot_region(region,lat_lon=None,ax=None): + ''' + Visualizes the area that a given region covers. Can help users understand + the extent of a region since they are not all rectangular. + + Parameters + ---------- + region : string + Name of predefined region in the WIND Toolkit + Options: 'Offshore_CA','Hawaii','Mid_Atlantic','NW_Pacific' + lat_lon : couple (optional) + Latitude and longitude pair to plot on top of the chosen region. Useful + to inform accurate latitude-longitude selection for data analysis. + ax : matplotlib axes object (optional) + Axes for plotting. If None, then a new figure is created. + + Returns + --------- + ax : matplotlib pyplot axes + ''' + assert isinstance(region, str), 'region must be of type string' + assert region in ['Offshore_CA','Hawaii','Mid_Atlantic','NW_Pacific'], f'{region} not in list of supported regions' + + wind_path = '/nrel/wtk/'+region.lower()+'/'+region+'_*.h5' + windKwargs = {'tree':None, 'unscale':True, 'str_decode':True, 'hsds':True, + 'years':[2019]} + + # Get the latitude and longitude list from the region in rex + rex_wind = MultiYearWindX(wind_path, **windKwargs) + lats = rex_wind.lat_lon[:,0] + lons = rex_wind.lat_lon[:,1] + + # Plot the latitude longitude pairs + if ax is None: + fig, ax = plt.subplots() + ax.plot(lons,lats,'o',label=f'{region} region') + if lat_lon is not None: + ax.plot(lat_lon[1],lat_lon[0],'o',label='Specified lat-lon point') + ax.set_xlabel('Longitude (deg)') + ax.set_ylabel('Latitude (deg)') + ax.grid() + ax.set_title(f'Extent of the WIND Toolkit {region} region') + ax.legend() + + return ax + + +def elevation_to_string(parameter, elevations): + """ + Takes in a parameter (e.g. 'windspeed') and elevations (e.g. [20, 40, 120]) + and returns the formatted strings that are input to WIND Toolkit (e.g. windspeed_10m). + Does not check parameter against the elevation levels. This is done in request_wtk_point_data. + + Parameters + ---------- + parameter: string + Name of the WIND toolkit parameter. + Options: 'windspeed', 'winddirection', 'temperature', 'pressure' + elevations : list + List of elevations (float). + Values can range from approxiamtely 20 to 200 in increments of 20, depending + on the parameter in question. See Documentation for request_wtk_point_data + for the full list of available parameters. + + Returns + --------- + parameter_list: list + Formatted List of WIND Toolkit parameter strings + + """ + + assert isinstance(parameter,str) + assert isinstance(elevations,(float,list)) + assert parameter in ['windspeed','winddirection','temperature','pressure'] + + parameter_list = [] + for e in elevations: + parameter_list.append(parameter+'_'+str(e)+'m') + + return parameter_list + + +def request_wtk_point_data(time_interval, parameter, lat_lon, years, preferred_region='', + tree=None, unscale=True, str_decode=True,hsds=True): + """ + Returns data from the WIND Toolkit offshore wind hindcast hosted on AWS at the specified latitude and longitude point(s), + or the closest available point(s). + Visit https://registry.opendata.aws/nrel-pds-wtk/ for more information about the dataset and available + locations and years. + + Calls with multiple parameters must have the same time interval. Calls + with multiple locations must use the same region (use the plot_region function). + + Note: To access the WIND Toolkit hindcast data, you will need to configure h5pyd for data access on HSDS. + Please see the WTK_hindcast_example notebook for more information. + + Parameters + ---------- + time_interval : string + Data set type of interest + Options: '1-hour' '5-minute' + parameter: string or list of strings + Dataset parameter to be downloaded. Other parameters may be available. + This list is limited to those available at both 5-minute and 1-hour + time intervals for all regions. + Options: + 'precipitationrate_0m', 'inversemoninobukhovlength_2m', + 'relativehumidity_2m', 'surface_sea_temperature', + 'pressure_0m', 'pressure_100m', 'pressure_200m', + 'temperature_10m', 'temperature_20m', 'temperature_40m', + 'temperature_60m', 'temperature_80m', 'temperature_100m', + 'temperature_120m', 'temperature_140m', 'temperature_160m', + 'temperature_180m', 'temperature_200m', + 'winddirection_10m', 'winddirection_20m', 'winddirection_40m', + 'winddirection_60m', 'winddirection_80m', 'winddirection_100m', + 'winddirection_120m', 'winddirection_140m', 'winddirection_160m', + 'winddirection_180m', 'winddirection_200m', + 'windspeed_10m', 'windspeed_20m', 'windspeed_40m', + 'windspeed_60m', 'windspeed_80m', 'windspeed_100m', + 'windspeed_120m', 'windspeed_140m', 'windspeed_160m', + 'windspeed_180m', 'windspeed_200m' + lat_lon: tuple or list of tuples + Latitude longitude pairs at which to extract data. Use plot_region() or + region_selection() to see the corresponding region for a given location. + years : list + Year(s) to be accessed. The years 2000-2019 available (up to 2020 + for Mid-Atlantic). Examples: [2015] or [2004,2006,2007] + preferred_region : string (optional) + Region that the lat_lon belongs to ('Offshore_CA' or 'NW_Pacific'). + Required when a lat_lon point falls in both the Offshore California + and NW Pacific regions. Overlap region defined by + latitude = (41.213, 42.642) and longitude = (-129.090, -121.672). + Default = '' + tree : str | cKDTree (optional) + cKDTree or path to .pkl file containing pre-computed tree + of lat, lon coordinates, default = None + unscale : bool (optional) + Boolean flag to automatically unscale variables on extraction + Default = True + str_decode : bool (optional) + Boolean flag to decode the bytestring meta data into normal + strings. Setting this to False will speed up the meta data read. + Default = True + hsds : bool (optional) + Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS + behind HSDS. Setting to False will indicate to look for files on + local machine, not AWS. Default = True + + Returns + --------- + data: DataFrame + Data indexed by datetime with columns named for parameter and cooresponding metadata index + meta: DataFrame + Location metadata for the requested data location + """ + + assert isinstance(parameter, (str, list)), 'parameter must be of type string or list' + assert isinstance(lat_lon, (list,tuple)), 'lat_lon must be of type list or tuple' + assert isinstance(time_interval, str), 'time_interval must be a string' + assert isinstance(years,list), 'years must be a list' + assert isinstance(preferred_region, str), 'preferred_region must be a string' + assert isinstance(tree,(str,type(None))), 'tree must be a string' + assert isinstance(unscale,bool), 'unscale must be bool type' + assert isinstance(str_decode,bool), 'str_decode must be bool type' + assert isinstance(hsds,bool), 'hsds must be bool type' + + # check for multiple region selection + if isinstance(lat_lon[0], float): + region = region_selection(lat_lon, preferred_region) + else: + reglist = [] + for loc in lat_lon: + reglist.append(region_selection(loc)) + if reglist.count(reglist[0]) == len(lat_lon): + region = reglist[0] + else: + raise TypeError('Coordinates must be within the same region!') + + if time_interval == '1-hour': + wind_path = f'/nrel/wtk/'+region.lower()+'/'+region+'_*.h5' + elif time_interval == '5-minute': + wind_path = f'/nrel/wtk/'+region.lower()+'-5min/'+region+'_*.h5' + else: + raise TypeError(f"Invalid time_interval '{time_interval}', must be '1-hour' or '5-minute'") + windKwargs = {'tree':tree,'unscale':unscale,'str_decode':str_decode, 'hsds':hsds, + 'years':years} + data_list = [] + + with MultiYearWindX(wind_path, **windKwargs) as rex_wind: + if isinstance(parameter, list): + for p in parameter: + temp_data = rex_wind.get_lat_lon_df(p,lat_lon) + col = temp_data.columns[:] + for i,c in zip(range(len(col)),col): + temp = f'{p}_{i}' + temp_data = temp_data.rename(columns={c:temp}) + + data_list.append(temp_data) + data= pd.concat(data_list, axis=1) + + else: + data = rex_wind.get_lat_lon_df(parameter,lat_lon) + col = data.columns[:] + + for i,c in zip(range(len(col)),col): + temp = f'{parameter}_{i}' + data = data.rename(columns={c:temp}) + + meta = rex_wind.meta.loc[col,:] + meta = meta.reset_index(drop=True) + return data, meta