Skip to content

Commit 091b041

Browse files
authored
Merge pull request #5 from ssolson/NDBC_checks
Ensure only successful results returned
2 parents 5b7447a + 6278868 commit 091b041

File tree

2 files changed

+42
-19
lines changed

2 files changed

+42
-19
lines changed

mhkit/tests/test_wave.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,28 @@ def test_ndbc_request_data_filenames_length(self):
555555

556556
def test_ndbc_request_data_empty_file(self):
557557
temp_stdout = StringIO()
558-
filename = pd.Series("42008h1984.txt.gz") # known empty file. If NDBC replaces, this test may fail.
558+
# known empty file. If NDBC replaces, this test may fail.
559+
filename = "42008h1984.txt.gz"
560+
buoy_id='42008'
561+
year = '1984'
559562
with contextlib.redirect_stdout(temp_stdout):
560-
wave.io.ndbc.request_data('stdmet', filename)
563+
wave.io.ndbc.request_data('stdmet', pd.Series(filename))
561564
output = temp_stdout.getvalue().strip()
562-
self.assertEqual(output, 'The NDBC file "' + filename.values +'" is empty or missing data. Please omit this file from your data request in the future.')
565+
msg = (f'The NDBC buoy {buoy_id} for year {year} with '
566+
f'filename {filename} is empty or missing '
567+
'data. Please omit this file from your data '
568+
'request in the future.')
569+
self.assertEqual(output, msg)
570+
571+
def test_ndbc_request_multiple_files_with_empty_file(self):
572+
temp_stdout = StringIO()
573+
# known empty file. If NDBC replaces, this test may fail.
574+
empty_file = '42008h1984.txt.gz'
575+
working_file = '46042h1996.txt.gz'
576+
filenames = pd.Series([empty_file, working_file])
577+
with contextlib.redirect_stdout(temp_stdout):
578+
ndbc_data =wave.io.ndbc.request_data('stdmet', filenames)
579+
self.assertEqual(1, len(ndbc_data))
563580

564581
def test_ndbc_dates_to_datetime(self):
565582
dt = wave.io.ndbc.dates_to_datetime('swden', self.swden)

mhkit/wave/io/ndbc.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict as _OrderedDict
2+
from collections import defaultdict as _defaultdict
23
from io import BytesIO
34
import pandas as pd
45
import numpy as np
@@ -236,16 +237,19 @@ def request_data(parameter, filenames, proxy=None):
236237
Data filenames on https://www.ndbc.noaa.gov/data/historical/{parameter}/
237238
238239
proxy: dict
239-
Proxy dict passed to python requests, (e.g. proxy_dict= {"http": 'http:wwwproxy.yourProxy:80/'})
240+
Proxy dict passed to python requests,
241+
(e.g. proxy_dict= {"http": 'http:wwwproxy.yourProxy:80/'})
240242
241243
Returns
242244
-------
243245
ndbc_data: dict
244246
Dictionary of DataFrames indexed by buoy and year.
245247
'''
246-
assert isinstance(filenames, (pd.Series,pd.DataFrame)), 'filenames must be of type pd.Series'
248+
assert isinstance(filenames, (pd.Series,pd.DataFrame)), (
249+
'filenames must be of type pd.Series')
247250
assert isinstance(parameter, str), 'parameter must be a string'
248-
assert isinstance(proxy, (dict, type(None))), 'If specified proxy must be a dict'
251+
assert isinstance(proxy, (dict, type(None))), ('If specified proxy'
252+
'must be a dict')
249253

250254
supported =_supported_params(parameter)
251255
if isinstance(filenames,pd.DataFrame):
@@ -254,11 +258,9 @@ def request_data(parameter, filenames, proxy=None):
254258
assert len(filenames)>0, "At least 1 filename must be passed"
255259
buoy_data = _parse_filenames(parameter, filenames)
256260
parameter_url = f'https://www.ndbc.noaa.gov/data/historical/{parameter}'
257-
ndbc_data = {}
261+
ndbc_data = _defaultdict(dict)
258262

259-
for buoy_id in buoy_data['id'].unique():
260-
ndbc_data_buoy={}
261-
263+
for buoy_id in buoy_data['id'].unique():
262264
buoy = buoy_data[buoy_data['id']== buoy_id]
263265
years = buoy.year
264266
filenames = buoy.filename
@@ -270,17 +272,21 @@ def request_data(parameter, filenames, proxy=None):
270272
response = requests.get(file_url, proxies=proxy)
271273
try:
272274
data = zlib.decompress(response.content, 16+zlib.MAX_WBITS)
273-
df = pd.read_csv(BytesIO(data), sep='\s+', low_memory=False)
274-
ndbc_data_buoy[year] = df
275+
df = pd.read_csv(BytesIO(data), sep='\s+', low_memory=False)
275276
except zlib.error:
276-
print('Issue decompressing the NDBC file "' + filename +'". Please re-run your code. It may take several tries to run sucessfully.')
277-
277+
msg = (f'Issue decompressing the NDBC file {filename}'
278+
f'(id: {buoy_id}, year: {year}). Please request '
279+
'the data again.')
280+
print(msg)
278281
except pandas.errors.EmptyDataError:
279-
print('The NDBC file "' + filename + '" is empty or missing data. Please omit this file from your data request in the future.')
280-
281-
282-
ndbc_data[buoy_id] = ndbc_data_buoy
283-
282+
msg = (f'The NDBC buoy {buoy_id} for year {year} with '
283+
f'filename {filename} is empty or missing '
284+
'data. Please omit this file from your data '
285+
'request in the future.')
286+
print(msg)
287+
else:
288+
ndbc_data[buoy_id][year] = df
289+
284290
if len(ndbc_data) == 1:
285291
ndbc_data = ndbc_data[buoy_id]
286292

0 commit comments

Comments
 (0)