1- import pandas as pd
2- import numpy as np
1+ import os
32import json
43import requests
4+ import shutil
5+ import pandas as pd
6+ from mhkit .utils .cache import handle_caching
7+
58
69def _read_usgs_json (text ):
7-
10+
811 data = pd .DataFrame ()
912 for i in range (len (text ['value' ]['timeSeries' ])):
1013 try :
11- site_name = text ['value' ]['timeSeries' ][i ]['variable' ]['variableDescription' ] #text['value']['timeSeries'][i]['sourceInfo']['siteName']
12- site_data = pd .DataFrame (text ['value' ]['timeSeries' ][i ]['values' ][0 ]['value' ])
14+ site_name = text ['value' ]['timeSeries' ][i ]['variable' ]['variableDescription' ]
15+ site_data = pd .DataFrame (
16+ text ['value' ]['timeSeries' ][i ]['values' ][0 ]['value' ])
1317 site_data .set_index ('dateTime' , drop = True , inplace = True )
1418 site_data .index = pd .to_datetime (site_data .index , utc = True )
1519 site_data .rename (columns = {'value' : site_name }, inplace = True )
@@ -19,8 +23,9 @@ def _read_usgs_json(text):
1923 data = data .combine_first (site_data )
2024 except :
2125 pass
22-
23- return data # we could also extract metadata and return that here
26+
27+ return data
28+
2429
2530def read_usgs_file (file_name ):
2631 """
@@ -30,7 +35,7 @@ def read_usgs_file(file_name):
3035 ----------
3136 file_name : str
3237 Name of USGS JSON data file
33-
38+
3439 Returns
3540 -------
3641 data : pandas DataFrame
@@ -39,18 +44,25 @@ def read_usgs_file(file_name):
3944 """
4045 with open (file_name ) as json_file :
4146 text = json .load (json_file )
42-
47+
4348 data = _read_usgs_json (text )
44-
45- return data
49+
50+ return data
4651
4752
48- def request_usgs_data (station , parameter , start_date , end_date ,
49- data_type = 'Daily' , proxy = None , write_json = None ):
53+ def request_usgs_data (
54+ station ,
55+ parameter ,
56+ start_date ,
57+ end_date ,
58+ data_type = 'Daily' ,
59+ proxy = None ,
60+ write_json = None ,
61+ clear_cache = False ):
5062 """
5163 Loads USGS data directly from https://waterdata.usgs.gov/nwis using a
5264 GET request
53-
65+
5466 The request URL prints to the screen.
5567
5668 Parameters
@@ -71,36 +83,58 @@ def request_usgs_data(station, parameter, start_date, end_date,
7183 for example {"http": 'localhost:8080'}
7284 write_json : str or None
7385 Name of json file to write data
74-
86+ clear_cache : bool
87+ If True, the cache for this specific request will be cleared.
88+
7589 Returns
7690 -------
7791 data : pandas DataFrame
7892 Data indexed by datetime with columns named according to the parameter's
7993 variable description
8094 """
81- assert data_type in ['Daily' , 'Instantaneous' ], 'data_type must be Daily or Instantaneous'
82-
95+ assert data_type in [
96+ 'Daily' , 'Instantaneous' ], 'data_type must be Daily or Instantaneous'
97+
98+ # Define the path to the cache directory
99+ cache_dir = os .path .join (os .path .expanduser ("~" ),
100+ ".cache" , "mhkit" , "usgs" )
101+
102+ # Create a unique filename based on the function parameters
103+ hash_params = f"{ station } _{ parameter } _{ start_date } _{ end_date } _{ data_type } "
104+
105+ # Use handle_caching to manage cache
106+ cached_data , metadata , cache_filepath = handle_caching (
107+ hash_params , cache_dir , write_json , clear_cache )
108+
109+ if cached_data is not None :
110+ return cached_data
111+
112+ # If no cached data, proceed with the API request
83113 if data_type == 'Daily' :
84114 data_url = 'https://waterservices.usgs.gov/nwis/dv'
85- api_query = '/?format=json&sites=' + station + \
86- '&startDT=' + start_date + '&endDT=' + end_date + \
87- '&statCd=00003' + \
115+ api_query = '/?format=json&sites=' + station + \
116+ '&startDT=' + start_date + '&endDT=' + end_date + \
117+ '&statCd=00003' + \
88118 '¶meterCd=' + parameter + '&siteStatus=all'
89119 else :
90120 data_url = 'https://waterservices.usgs.gov/nwis/iv'
91- api_query = '/?format=json&sites=' + station + \
92- '&startDT=' + start_date + '&endDT=' + end_date + \
121+ api_query = '/?format=json&sites=' + station + \
122+ '&startDT=' + start_date + '&endDT=' + end_date + \
93123 '¶meterCd=' + parameter + '&siteStatus=all'
94-
124+
95125 print ('Data request URL: ' , data_url + api_query )
96-
97- response = requests .get (url = data_url + api_query ,proxies = proxy )
126+
127+ response = requests .get (url = data_url + api_query , proxies = proxy )
98128 text = json .loads (response .text )
99-
100- if write_json is not None :
101- with open (write_json , 'w' ) as outfile :
102- json .dump (text , outfile )
103-
129+
104130 data = _read_usgs_json (text )
105-
106- return data
131+
132+ # After making the API request and processing the response, write the
133+ # response to a cache file
134+ handle_caching (hash_params , cache_dir , data = data ,
135+ clear_cache_file = clear_cache )
136+
137+ if write_json :
138+ shutil .copy (cache_filepath , write_json )
139+
140+ return data
0 commit comments