1717
1818
1919def ecco_podaac_access (query ,version = 'v4r4' ,grid = None ,time_res = 'all' ,\
20- StartDate = None ,EndDate = None ,\
20+ StartDate = None ,EndDate = None ,snapshot_interval = None , \
2121 mode = 'download_ifspace' ,download_root_dir = None ,** kwargs ):
2222 """
2323
@@ -60,6 +60,12 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
6060 For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
6161 within the specified date range.
6262
63+ snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames,
64+ this determines whether snapshots are included for only the beginning/end of each month
65+ ('monthly'), or for every day ('daily').
66+ If None or not specified, defaults to 'daily' if any daily mean ShortNames are included
67+ and 'monthly' otherwise.
68+
6369 mode: str, one of the following:
6470 'ls' or 'query': Query dataset ShortNames and variable names/
6571 descriptions only; no downloads.
@@ -187,11 +193,11 @@ def shortnames_find(query_list,grid,time_res):
187193 kwargs ['max_avail_frac' ] = 0.5
188194 if mode == 'download_ifspace' :
189195 granule_files = ecco_podaac_download_diskaware (\
190- shortnames ,StartDate ,EndDate ,\
196+ shortnames ,StartDate ,EndDate ,snapshot_interval , \
191197 download_root_dir = download_root_dir ,** kwargs )
192198 elif mode == 's3_get_ifspace' :
193199 granule_files = ecco_podaac_s3_get_diskaware (\
194- shortnames ,StartDate ,EndDate ,\
200+ shortnames ,StartDate ,EndDate ,snapshot_interval , \
195201 download_root_dir = download_root_dir ,** kwargs )
196202 else :
197203 raise ValueError ('Invalid mode specified; please specify one of the following:' \
@@ -200,38 +206,48 @@ def shortnames_find(query_list,grid,time_res):
200206 if 'max_avail_frac' in kwargs .keys ():
201207 del kwargs ['max_avail_frac' ]
202208 granule_files = {}
209+
210+ # determine value of snapshot_interval if None or not specified
211+ if snapshot_interval == None :
212+ snapshot_interval = 'monthly'
213+ for curr_shortname in shortnames :
214+ if 'DAILY' in curr_shortname :
215+ snapshot_interval = 'daily'
216+ break
217+
203218 for shortname in shortnames :
219+
204220 if mode in ['ls' ,'query' ]:
205- urls ,sizes = ecco_podaac_query (shortname ,StartDate ,EndDate )
221+ urls ,sizes = ecco_podaac_query (shortname ,StartDate ,EndDate , snapshot_interval )
206222 granule_files [shortname ] = urls
207223 elif mode in ['s3_ls' ,'s3_query' ]:
208224 granule_files [shortname ] = ecco_podaac_s3_query (\
209- shortname ,StartDate ,EndDate )
225+ shortname ,StartDate ,EndDate , snapshot_interval )
210226 elif mode == 'download' :
211227 kwargs ['return_downloaded_files' ] = True
212228 granule_files [shortname ] = ecco_podaac_download (\
213- shortname ,StartDate ,EndDate ,\
229+ shortname ,StartDate ,EndDate ,snapshot_interval , \
214230 download_root_dir = download_root_dir ,\
215231 ** kwargs )
216232 elif mode == 'download_subset' :
217233 if 'n_workers' not in kwargs .keys ():
218234 kwargs ['n_workers' ] = 4
219235 kwargs ['return_downloaded_files' ] = True
220236 granule_files [shortname ] = ecco_podaac_download_subset (\
221- shortname ,StartDate ,EndDate ,\
237+ shortname ,StartDate ,EndDate ,snapshot_interval , \
222238 download_root_dir = download_root_dir ,\
223239 ** kwargs )
224240 elif mode == 's3_open' :
225241 granule_files [shortname ] = ecco_podaac_s3_open (\
226- shortname ,StartDate ,EndDate )
242+ shortname ,StartDate ,EndDate , snapshot_interval )
227243 elif mode == 's3_open_fsspec' :
228244 # granule_files will consist of mapper objects rather than URL/path or file lists
229245 granule_files [shortname ] = ecco_podaac_s3_open_fsspec (\
230246 shortname ,** kwargs )
231247 elif mode == 's3_get' :
232248 kwargs ['return_downloaded_files' ] = True
233249 granule_files [shortname ] = ecco_podaac_s3_get (\
234- shortname ,StartDate ,EndDate ,\
250+ shortname ,StartDate ,EndDate ,snapshot_interval , \
235251 download_root_dir = download_root_dir ,\
236252 ** kwargs )
237253 else :
@@ -256,9 +272,9 @@ def shortnames_find(query_list,grid,time_res):
256272###================================================================================================================
257273
258274
259- def ecco_podaac_access_to_xrdataset (query ,version = 'v4r4' ,grid = None ,time_res = 'all' ,\
260- StartDate = None ,EndDate = None ,\
261- mode = 'download_ifspace' ,download_root_dir = None ,** kwargs ):
275+ def ecco_podaac_to_xrdataset (query ,version = 'v4r4' ,grid = None ,time_res = 'all' ,\
276+ StartDate = None ,EndDate = None , snapshot_interval = None ,\
277+ mode = 'download_ifspace' ,download_root_dir = None ,** kwargs ):
262278 """
263279
264280 This function queries and accesses ECCO datasets from PO.DAAC. The core query and download functions are adapted from Jupyter notebooks
@@ -302,6 +318,12 @@ def ecco_podaac_access_to_xrdataset(query,version='v4r4',grid=None,time_res='all
302318 For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
303319 within the specified date range.
304320
321+ snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames,
322+ this determines whether snapshots are included for only the beginning/end of each month
323+ ('monthly'), or for every day ('daily').
324+ If None or not specified, defaults to 'daily' if any daily mean ShortNames are included
325+ and 'monthly' otherwise.
326+
305327 mode: str, one of the following:
306328 'ls' or 'query': Query dataset ShortNames and variable names/
307329 descriptions only; no downloads.
@@ -367,39 +389,61 @@ def ecco_podaac_access_to_xrdataset(query,version='v4r4',grid=None,time_res='all
367389
368390 pass
369391
392+
370393 import numpy as np
371394 import xarray as xr
372395
373396
374397 # raise error if mode is ls/query only
375398 if mode in ['ls' ,'query' ,'s3_ls' ,'s3_query' ]:
376- raise ValueError ("ecco_podaac_access_to_xrdataset does not work with 'ls'/'query' modes. \n " \
399+ raise ValueError ("ecco_podaac_to_xrdataset does not work with 'ls'/'query' modes. \n " \
377400 + "Please use ecco_podaac_access with these modes." )
378401
379402 return - 1
380403
381404 # submit access query (and download if needed)
382405 access_output = ecco_podaac_access (query ,version ,grid ,time_res ,\
383- StartDate ,EndDate ,\
406+ StartDate ,EndDate ,snapshot_interval , \
384407 mode ,download_root_dir ,** kwargs )
385408
409+ # determine value of snapshot_interval if None or not specified
410+ if snapshot_interval == None :
411+ snapshot_interval = 'monthly'
412+ for curr_shortname in access_output .keys ():
413+ if 'DAILY' in curr_shortname :
414+ snapshot_interval = 'daily'
415+ break
416+
386417 # open xarray datasets
387418 ds_out = {}
388419 for shortname ,access_out in access_output .items ():
389420 if mode == 's3_open_fsspec' :
390- ds_out [ shortname ] = xr .open_dataset (access_out ,engine = 'zarr' ,consolidated = False )
391- if 'time' in ds_out [ shortname ] .dims :
421+ curr_ds = xr .open_dataset (access_out ,engine = 'zarr' ,consolidated = False )
422+ if 'time' in curr_ds .dims :
392423 # isolate time range specified
393424 startdate ,enddate = date_adjustment (shortname ,\
394425 StartDate ,EndDate ,CMR_query = False )
395- time_values = ds_out [ shortname ] .time .values .astype ('datetime64[D]' )
426+ time_values = curr_ds .time .values .astype ('datetime64[D]' )
396427 in_time_range = np .logical_and (time_values >= startdate ,\
397428 time_values <= enddate ).nonzero ()[0 ]
398- ds_out [shortname ] = ds_out [shortname ].isel (time = in_time_range )
429+ curr_ds = curr_ds .isel (time = in_time_range )
430+ if (('SNAPSHOT' in shortname ) and (snapshot_interval == 'monthly' )):
431+ month_bounds_list = np .arange (np .datetime64 ('1992-01' ,'M' ),\
432+ np .datetime64 ('2040-01' ,'M' ),\
433+ np .timedelta64 (1 ,'M' ))\
434+ .astype ('datetime64[D]' )
435+ time_values = curr_ds .time .values .astype ('datetime64[D]' )
436+ time_subind = list (np .arange (0 ,len (time_values )).astype ('int64' ))
437+ for count ,time_val in enumerate (time_values ):
438+ if time_val not in month_bounds_list :
439+ time_subind .remove (count )
440+ curr_ds = curr_ds .isel (time = time_subind )
399441 else :
400- ds_out [shortname ] = xr .open_mfdataset (access_out ,\
401- compat = 'override' ,data_vars = 'minimal' ,coords = 'minimal' ,\
402- parallel = True )
442+ curr_ds = xr .open_mfdataset (access_out ,\
443+ compat = 'override' ,data_vars = 'minimal' ,coords = 'minimal' ,\
444+ parallel = True )
445+ ds_out [shortname ] = curr_ds
446+
403447
404448 # if only one ShortName is involved, then extract dataset from dictionary
405449 if len (ds_out ) == 1 :
0 commit comments