Skip to content

Commit 7f75752

Browse files
committed
renamed ecco_podaac_to_xrdataset function; fixed bug in monthly snapshot dates
1 parent 2cae0d4 commit 7f75752

File tree

4 files changed

+150
-72
lines changed

4 files changed

+150
-72
lines changed

ecco_access/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .ecco_access import ecco_podaac_access
2-
from .ecco_access import ecco_podaac_access_to_xrdataset
2+
from .ecco_access import ecco_podaac_to_xrdataset
33

44
from .ecco_download import ecco_podaac_query
55
from .ecco_download import ecco_podaac_download

ecco_access/ecco_access.py

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
20-
StartDate=None,EndDate=None,\
20+
StartDate=None,EndDate=None,snapshot_interval=None,\
2121
mode='download_ifspace',download_root_dir=None,**kwargs):
2222
"""
2323
@@ -60,6 +60,12 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
6060
For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
6161
within the specified date range.
6262
63+
snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames,
64+
this determines whether snapshots are included for only the beginning/end of each month
65+
('monthly'), or for every day ('daily').
66+
If None or not specified, defaults to 'daily' if any daily mean ShortNames are included
67+
and 'monthly' otherwise.
68+
6369
mode: str, one of the following:
6470
'ls' or 'query': Query dataset ShortNames and variable names/
6571
descriptions only; no downloads.
@@ -187,11 +193,11 @@ def shortnames_find(query_list,grid,time_res):
187193
kwargs['max_avail_frac'] = 0.5
188194
if mode == 'download_ifspace':
189195
granule_files = ecco_podaac_download_diskaware(\
190-
shortnames,StartDate,EndDate,\
196+
shortnames,StartDate,EndDate,snapshot_interval,\
191197
download_root_dir=download_root_dir,**kwargs)
192198
elif mode == 's3_get_ifspace':
193199
granule_files = ecco_podaac_s3_get_diskaware(\
194-
shortnames,StartDate,EndDate,\
200+
shortnames,StartDate,EndDate,snapshot_interval,\
195201
download_root_dir=download_root_dir,**kwargs)
196202
else:
197203
raise ValueError('Invalid mode specified; please specify one of the following:'\
@@ -200,38 +206,48 @@ def shortnames_find(query_list,grid,time_res):
200206
if 'max_avail_frac' in kwargs.keys():
201207
del kwargs['max_avail_frac']
202208
granule_files = {}
209+
210+
# determine value of snapshot_interval if None or not specified
211+
if snapshot_interval == None:
212+
snapshot_interval = 'monthly'
213+
for curr_shortname in shortnames:
214+
if 'DAILY' in curr_shortname:
215+
snapshot_interval = 'daily'
216+
break
217+
203218
for shortname in shortnames:
219+
204220
if mode in ['ls','query']:
205-
urls,sizes = ecco_podaac_query(shortname,StartDate,EndDate)
221+
urls,sizes = ecco_podaac_query(shortname,StartDate,EndDate,snapshot_interval)
206222
granule_files[shortname] = urls
207223
elif mode in ['s3_ls','s3_query']:
208224
granule_files[shortname] = ecco_podaac_s3_query(\
209-
shortname,StartDate,EndDate)
225+
shortname,StartDate,EndDate,snapshot_interval)
210226
elif mode == 'download':
211227
kwargs['return_downloaded_files'] = True
212228
granule_files[shortname] = ecco_podaac_download(\
213-
shortname,StartDate,EndDate,\
229+
shortname,StartDate,EndDate,snapshot_interval,\
214230
download_root_dir=download_root_dir,\
215231
**kwargs)
216232
elif mode == 'download_subset':
217233
if 'n_workers' not in kwargs.keys():
218234
kwargs['n_workers'] = 4
219235
kwargs['return_downloaded_files'] = True
220236
granule_files[shortname] = ecco_podaac_download_subset(\
221-
shortname,StartDate,EndDate,\
237+
shortname,StartDate,EndDate,snapshot_interval,\
222238
download_root_dir=download_root_dir,\
223239
**kwargs)
224240
elif mode == 's3_open':
225241
granule_files[shortname] = ecco_podaac_s3_open(\
226-
shortname,StartDate,EndDate)
242+
shortname,StartDate,EndDate,snapshot_interval)
227243
elif mode == 's3_open_fsspec':
228244
# granule_files will consist of mapper objects rather than URL/path or file lists
229245
granule_files[shortname] = ecco_podaac_s3_open_fsspec(\
230246
shortname,**kwargs)
231247
elif mode == 's3_get':
232248
kwargs['return_downloaded_files'] = True
233249
granule_files[shortname] = ecco_podaac_s3_get(\
234-
shortname,StartDate,EndDate,\
250+
shortname,StartDate,EndDate,snapshot_interval,\
235251
download_root_dir=download_root_dir,\
236252
**kwargs)
237253
else:
@@ -256,9 +272,9 @@ def shortnames_find(query_list,grid,time_res):
256272
###================================================================================================================
257273

258274

259-
def ecco_podaac_access_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
260-
StartDate=None,EndDate=None,\
261-
mode='download_ifspace',download_root_dir=None,**kwargs):
275+
def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
276+
StartDate=None,EndDate=None,snapshot_interval=None,\
277+
mode='download_ifspace',download_root_dir=None,**kwargs):
262278
"""
263279
264280
This function queries and accesses ECCO datasets from PO.DAAC. The core query and download functions are adapted from Jupyter notebooks
@@ -302,6 +318,12 @@ def ecco_podaac_access_to_xrdataset(query,version='v4r4',grid=None,time_res='all
302318
For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
303319
within the specified date range.
304320
321+
snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames,
322+
this determines whether snapshots are included for only the beginning/end of each month
323+
('monthly'), or for every day ('daily').
324+
If None or not specified, defaults to 'daily' if any daily mean ShortNames are included
325+
and 'monthly' otherwise.
326+
305327
mode: str, one of the following:
306328
'ls' or 'query': Query dataset ShortNames and variable names/
307329
descriptions only; no downloads.
@@ -367,39 +389,61 @@ def ecco_podaac_access_to_xrdataset(query,version='v4r4',grid=None,time_res='all
367389

368390
pass
369391

392+
370393
import numpy as np
371394
import xarray as xr
372395

373396

374397
# raise error if mode is ls/query only
375398
if mode in ['ls','query','s3_ls','s3_query']:
376-
raise ValueError("ecco_podaac_access_to_xrdataset does not work with 'ls'/'query' modes. \n"\
399+
raise ValueError("ecco_podaac_to_xrdataset does not work with 'ls'/'query' modes. \n"\
377400
+"Please use ecco_podaac_access with these modes.")
378401

379402
return -1
380403

381404
# submit access query (and download if needed)
382405
access_output = ecco_podaac_access(query,version,grid,time_res,\
383-
StartDate,EndDate,\
406+
StartDate,EndDate,snapshot_interval,\
384407
mode,download_root_dir,**kwargs)
385408

409+
# determine value of snapshot_interval if None or not specified
410+
if snapshot_interval == None:
411+
snapshot_interval = 'monthly'
412+
for curr_shortname in access_output.keys():
413+
if 'DAILY' in curr_shortname:
414+
snapshot_interval = 'daily'
415+
break
416+
386417
# open xarray datasets
387418
ds_out = {}
388419
for shortname,access_out in access_output.items():
389420
if mode == 's3_open_fsspec':
390-
ds_out[shortname] = xr.open_dataset(access_out,engine='zarr',consolidated=False)
391-
if 'time' in ds_out[shortname].dims:
421+
curr_ds = xr.open_dataset(access_out,engine='zarr',consolidated=False)
422+
if 'time' in curr_ds.dims:
392423
# isolate time range specified
393424
startdate,enddate = date_adjustment(shortname,\
394425
StartDate,EndDate,CMR_query=False)
395-
time_values = ds_out[shortname].time.values.astype('datetime64[D]')
426+
time_values = curr_ds.time.values.astype('datetime64[D]')
396427
in_time_range = np.logical_and(time_values >= startdate,\
397428
time_values <= enddate).nonzero()[0]
398-
ds_out[shortname] = ds_out[shortname].isel(time=in_time_range)
429+
curr_ds = curr_ds.isel(time=in_time_range)
430+
if (('SNAPSHOT' in shortname) and (snapshot_interval == 'monthly')):
431+
month_bounds_list = np.arange(np.datetime64('1992-01','M'),\
432+
np.datetime64('2040-01','M'),\
433+
np.timedelta64(1,'M'))\
434+
.astype('datetime64[D]')
435+
time_values = curr_ds.time.values.astype('datetime64[D]')
436+
time_subind = list(np.arange(0,len(time_values)).astype('int64'))
437+
for count,time_val in enumerate(time_values):
438+
if time_val not in month_bounds_list:
439+
time_subind.remove(count)
440+
curr_ds = curr_ds.isel(time=time_subind)
399441
else:
400-
ds_out[shortname] = xr.open_mfdataset(access_out,\
401-
compat='override',data_vars='minimal',coords='minimal',\
402-
parallel=True)
442+
curr_ds = xr.open_mfdataset(access_out,\
443+
compat='override',data_vars='minimal',coords='minimal',\
444+
parallel=True)
445+
ds_out[shortname] = curr_ds
446+
403447

404448
# if only one ShortName is involved, then extract dataset from dictionary
405449
if len(ds_out) == 1:

0 commit comments

Comments
 (0)