diff --git a/ecco_access/ecco_access.py b/ecco_access/ecco_access.py index f4ae96a..1fd3cd0 100644 --- a/ecco_access/ecco_access.py +++ b/ecco_access/ecco_access.py @@ -124,14 +124,14 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\ Returns ------- - download_files: dict, with keys: ShortNames and values: - URLs (if in 'query' mode), or paths of files that can be - passed directly to xarray (open_dataset or open_mfdataset). - Values are of type str if query finds only one granule/file - for that ShortName; of type list if query finds - multiple granules in the same dataset; - or of type fsspec.mapping.FSMap if mode = 's3_open_fsspec'. - Only returned if return_granules=True (default). + granule_files: dict with ShortNames as keys; values are URLs or S3 paths + (if in 'query' mode), or paths of files that can be + passed directly to xarray (open_dataset or open_mfdataset). + Values are of type str if query finds only one granule/file + for that ShortName; of type list if query finds + multiple granules in the same dataset; + or of type fsspec.mapping.FSMap if mode = 's3_open_fsspec'. + Only returned if return_granules=True (default). """ @@ -272,6 +272,7 @@ def shortnames_find(query_list,grid,time_res): ###================================================================================================================ + def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\ StartDate=None,EndDate=None,snapshot_interval=None,\ mode='download_ifspace',download_root_dir=None,**kwargs): @@ -325,10 +326,6 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\ and 'monthly' otherwise. mode: str, one of the following: - 'ls' or 'query': Query dataset ShortNames and variable names/ - descriptions only; no downloads. - 's3_ls' or 's3_query': Query dataset ShortNames and variable names/ - descriptions only; return paths on S3. 'download': Download datasets using NASA Earthdata URLs 'download_ifspace': Check storage availability before downloading. Download only if storage footprint of downloads @@ -375,16 +372,11 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\ force_redownload: bool, if True, existing files will be redownloaded and replaced; if False (default), existing files will not be replaced. - return_granules: bool, if True (default), str or list of queried or - downloaded granules/files (including ones that - were already on disk and not replaced) is returned. - if False, the function returns nothing. - Returns ------- ds_out: xarray Dataset or dict of xarray Datasets (with ShortNames as keys), containing all of the accessed datasets. - Does not work with the query modes: 'ls','query','s3_ls','s3_query'. + This function does not work with the query modes: 'ls','query','s3_ls','s3_query'. """ pass diff --git a/ecco_access/ecco_download.py b/ecco_access/ecco_download.py index bd947cf..1fe6e65 100644 --- a/ecco_access/ecco_download.py +++ b/ecco_access/ecco_download.py @@ -383,8 +383,8 @@ def ecco_podaac_download(ShortName,StartDate,EndDate,snapshot_interval='monthly' ###================================================================================================================ -def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5,snapshot_interval=None,\ - download_root_dir=None,n_workers=6,force_redownload=False): +def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,snapshot_interval=None,\ + download_root_dir=None,max_avail_frac=0.5,n_workers=6,force_redownload=False): """ @@ -405,11 +405,6 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0 ECCOv4r4 date range is '1992-01-01' to '2017-12-31'. For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets within the specified date range. - - max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets. - If storing the datasets exceeds this fraction, an error is returned. - Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer - endpoint of the range. snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames, this determines whether snapshots are included for only the beginning/end of each month @@ -420,6 +415,11 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0 download_root_dir: str, defines parent directory to download files to. Files will be downloaded to directory download_root_dir/ShortName/. If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'. + + max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets. + If storing the datasets exceeds this fraction, an error is returned. + Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer + endpoint of the range. n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6. Applies only if files are downloaded. diff --git a/ecco_access/ecco_s3_retrieve.py b/ecco_access/ecco_s3_retrieve.py index 2f177f8..4169e9d 100644 --- a/ecco_access/ecco_s3_retrieve.py +++ b/ecco_access/ecco_s3_retrieve.py @@ -527,8 +527,8 @@ def ecco_podaac_s3_get(ShortName,StartDate,EndDate,snapshot_interval='monthly',d ###================================================================================================================ -def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5,snapshot_interval=None,\ - download_root_dir=None,n_workers=6,force_redownload=False): +def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,snapshot_interval=None,\ + download_root_dir=None,max_avail_frac=0.5,n_workers=6,force_redownload=False): """ @@ -551,12 +551,7 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5 ECCOv4r4 date range is '1992-01-01' to '2017-12-31'. For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets within the specified date range. - - max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets. - This determines whether the dataset files are stored on the current instance, or opened on S3. - Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer - endpoint of the range. - + snapshot_interval: ('monthly', 'daily', or None), if snapshot datasets are included in ShortNames, this determines whether snapshots are included for only the beginning/end of each month ('monthly'), or for every day ('daily'). @@ -567,6 +562,11 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5 Files will be downloaded to directory download_root_dir/ShortName/. If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'. + max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets. + This determines whether the dataset files are stored on the current instance, or opened on S3. + Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer + endpoint of the range. + n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6. Applies only if files are downloaded.