Merge pull request #5 from ssolson/NDBC_checks

rpauly18 · web-flow · commit 091b0413012a · 2020-09-08T07:35:57.000-06:00
Ensure only successful results returned
diff --git a/mhkit/tests/test_wave.py b/mhkit/tests/test_wave.py
@@ -555,11 +555,28 @@ def test_ndbc_request_data_filenames_length(self):
 
     def test_ndbc_request_data_empty_file(self):
         temp_stdout = StringIO()
-        filename = pd.Series("42008h1984.txt.gz")  # known empty file. If NDBC replaces, this test may fail. 
+        # known empty file. If NDBC replaces, this test may fail. 
+        filename = "42008h1984.txt.gz"  
+        buoy_id='42008'
+        year = '1984'
         with contextlib.redirect_stdout(temp_stdout):
-            wave.io.ndbc.request_data('stdmet', filename)
+            wave.io.ndbc.request_data('stdmet', pd.Series(filename))
         output = temp_stdout.getvalue().strip()
-        self.assertEqual(output, 'The NDBC file "' + filename.values +'" is empty or missing data. Please omit this file from your data request in the future.')
+        msg = (f'The NDBC buoy {buoy_id} for year {year} with ' 
+               f'filename {filename} is empty or missing '     
+                'data. Please omit this file from your data '   
+                'request in the future.')
+        self.assertEqual(output, msg)
+
+    def test_ndbc_request_multiple_files_with_empty_file(self):
+        temp_stdout = StringIO()
+        # known empty file. If NDBC replaces, this test may fail. 
+        empty_file = '42008h1984.txt.gz'
+        working_file = '46042h1996.txt.gz'
+        filenames = pd.Series([empty_file, working_file])
+        with contextlib.redirect_stdout(temp_stdout):
+            ndbc_data =wave.io.ndbc.request_data('stdmet', filenames)        
+        self.assertEqual(1, len(ndbc_data))              
         
     def test_ndbc_dates_to_datetime(self):
         dt = wave.io.ndbc.dates_to_datetime('swden', self.swden)
diff --git a/mhkit/wave/io/ndbc.py b/mhkit/wave/io/ndbc.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict as _OrderedDict
+from collections import defaultdict as _defaultdict
 from io import BytesIO
 import pandas as pd
 import numpy as np
@@ -236,16 +237,19 @@ def request_data(parameter, filenames, proxy=None):
 	    Data filenames on https://www.ndbc.noaa.gov/data/historical/{parameter}/
     
     proxy: dict
-	    Proxy dict passed to python requests, (e.g. proxy_dict= {"http": 'http:wwwproxy.yourProxy:80/'})  
+	    Proxy dict passed to python requests, 
+        (e.g. proxy_dict= {"http": 'http:wwwproxy.yourProxy:80/'})  
         
     Returns
     -------
     ndbc_data: dict
         Dictionary of DataFrames indexed by buoy and year.
     '''
-    assert isinstance(filenames, (pd.Series,pd.DataFrame)), 'filenames must be of type pd.Series' 
+    assert isinstance(filenames, (pd.Series,pd.DataFrame)), (
+        'filenames must be of type pd.Series') 
     assert isinstance(parameter, str), 'parameter must be a string'
-    assert isinstance(proxy, (dict, type(None))), 'If specified proxy must be a dict' 
+    assert isinstance(proxy, (dict, type(None))), ('If specified proxy' 
+      'must be a dict')
     
     supported =_supported_params(parameter)
     if isinstance(filenames,pd.DataFrame):
@@ -254,11 +258,9 @@ def request_data(parameter, filenames, proxy=None):
     assert len(filenames)>0, "At least 1 filename must be passed"      
     buoy_data = _parse_filenames(parameter, filenames)
     parameter_url = f'https://www.ndbc.noaa.gov/data/historical/{parameter}'
-    ndbc_data = {}    
+    ndbc_data = _defaultdict(dict)    
     
-    for buoy_id in buoy_data['id'].unique():
-        ndbc_data_buoy={}
-        
+    for buoy_id in buoy_data['id'].unique():        
         buoy = buoy_data[buoy_data['id']== buoy_id]
         years = buoy.year
         filenames = buoy.filename
@@ -270,17 +272,21 @@ def request_data(parameter, filenames, proxy=None):
                 response = requests.get(file_url, proxies=proxy)
             try: 
                 data = zlib.decompress(response.content, 16+zlib.MAX_WBITS)
-                df = pd.read_csv(BytesIO(data), sep='\s+', low_memory=False)
-                ndbc_data_buoy[year] = df
+                df = pd.read_csv(BytesIO(data), sep='\s+', low_memory=False)                
             except zlib.error: 
-                print('Issue decompressing the NDBC file "' + filename +'". Please re-run your code. It may take several tries to run sucessfully.')
-
+                msg = (f'Issue decompressing the NDBC file {filename}'  
+                       f'(id: {buoy_id}, year: {year}). Please request ' 
+                       'the data again.')
+                print(msg)
             except pandas.errors.EmptyDataError: 
-                print('The NDBC file "' + filename + '" is empty or missing data. Please omit this file from your data request in the future.')
-                
-
-        ndbc_data[buoy_id] = ndbc_data_buoy
-        
+                msg = (f'The NDBC buoy {buoy_id} for year {year} with ' 
+                       f'filename {filename} is empty or missing '     
+                        'data. Please omit this file from your data '   
+                        'request in the future.')
+                print(msg)
+            else:
+                ndbc_data[buoy_id][year] = df    
+                             
     if len(ndbc_data) == 1:
         ndbc_data = ndbc_data[buoy_id]