Skip to content

Commit 8db27f5

Browse files
authored
Improve Testing Time (#241)
Use caching by default & reduce testing time
1 parent 1284a1a commit 8db27f5

File tree

21 files changed

+1939
-1047
lines changed

21 files changed

+1939
-1047
lines changed

.github/workflows/main.yml

Lines changed: 78 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Py 3.7 3.8, 3.9 | Windows Mac Linux
1+
name: Py 3.8, 3.9 | Windows Mac Linux
22

33
on:
44
push:
@@ -9,16 +9,63 @@ on:
99
branches:
1010
- master
1111
- develop
12-
1312
jobs:
13+
prepare-cache:
14+
runs-on: ubuntu-latest
15+
env:
16+
PYTHON_VER: 3.9
17+
steps:
18+
- name: Checkout code
19+
uses: actions/checkout@v2
20+
21+
- name: Setup Conda
22+
uses: s-weigand/setup-conda@v1
23+
with:
24+
activate-conda: false
25+
conda-channels: conda-forge
26+
27+
- name: Python setup
28+
shell: bash -l {0}
29+
run: |
30+
conda create --name TEST python=${{ env.PYTHON_VER }} numpy cython pip hdf5 libnetcdf cftime netcdf4 --strict-channel-priority
31+
source activate TEST
32+
export PATH="${CONDA_PREFIX}/bin:${CONDA_PREFIX}/Library/bin:$PATH" # so setup.py finds nc-config
33+
pip install -e . --no-deps --force-reinstall
34+
35+
- name: Install dependencies
36+
shell: bash -l {0}
37+
run: |
38+
source activate TEST
39+
python -m pip install --upgrade pip wheel
40+
pip install coverage pytest coveralls .
41+
42+
- name: Prepare data
43+
run: |
44+
# pytest tests/test_specific_file.py::TestClass::test_function
45+
source activate TEST
46+
pytest mhkit/tests/river/test_io.py
47+
pytest mhkit/tests/tidal/test_io.py
48+
pytest mhkit/tests/wave/io/test_cdip.py
49+
pytest mhkit/tests/wave/io/hindcast/test_hindcast.py
50+
pytest mhkit/tests/wave/io/hindcast/test_wind_toolkit.py
51+
52+
- name: Upload data as artifact
53+
uses: actions/upload-artifact@v2
54+
with:
55+
name: data
56+
path: ~/.cache/mhkit
57+
1458
conda-build:
1559
name: conda-${{ matrix.os }}/${{ matrix.python-version }}
60+
needs: [prepare-cache]
1661
runs-on: ${{ matrix.os }}
1762
strategy:
1863
fail-fast: false
1964
matrix:
2065
os: ["windows-latest", "ubuntu-latest", "macos-latest"]
21-
python-version: [3.7, 3.8, 3.9]
66+
python-version: [3.8, 3.9]
67+
env:
68+
PYTHON_VER: ${{ matrix.python-version }}
2269

2370
steps:
2471
- uses: actions/checkout@v2
@@ -32,20 +79,23 @@ jobs:
3279
- name: Python ${{ matrix.python-version }}
3380
shell: bash -l {0}
3481
run: |
35-
conda create --name TEST python=${{ matrix.python-version }} numpy cython pip pytest hdf5 libnetcdf cftime netcdf4 --strict-channel-priority
82+
conda create --name TEST python=${PYTHON_VER} numpy cython pip hdf5 libnetcdf cftime netcdf4 --strict-channel-priority
3683
source activate TEST
3784
export PATH="${CONDA_PREFIX}/bin:${CONDA_PREFIX}/Library/bin:$PATH" # so setup.py finds nc-config
3885
pip install -e . --no-deps --force-reinstall
3986
40-
- name: Tests
87+
- name: Install dependencies
4188
shell: bash -l {0}
4289
run: |
4390
source activate TEST
4491
python -m pip install --upgrade pip wheel
45-
pip install coverage
46-
pip install pytest
47-
pip install coveralls
48-
pip install .
92+
pip install coverage pytest coveralls .
93+
94+
- name: Download data from artifact
95+
uses: actions/download-artifact@v2
96+
with:
97+
name: data
98+
path: ~/.cache/mhkit
4999

50100
- name: Run pytest
51101
shell: bash -l {0}
@@ -65,6 +115,7 @@ jobs:
65115

66116
pip-build:
67117
name: pip-${{ matrix.os }}/${{ matrix.python-version }}
118+
needs: [prepare-cache]
68119
runs-on: ${{ matrix.os }}
69120
strategy:
70121
fail-fast: false
@@ -83,13 +134,16 @@ jobs:
83134
- name: Set up Git repository
84135
uses: actions/checkout@v2
85136

137+
- name: Download data from artifact
138+
uses: actions/download-artifact@v2
139+
with:
140+
name: data
141+
path: ~/.cache/mhkit
142+
86143
- name: Update and install packages
87144
run: |
88145
python -m pip install --upgrade pip wheel
89-
pip install coverage
90-
pip install pytest
91-
pip install coveralls
92-
pip install .
146+
pip install coverage pytest coveralls .
93147
94148
- name: Run pytest
95149
run: |
@@ -104,13 +158,14 @@ jobs:
104158

105159
hindcast-calls:
106160
name: hindcast-${{ matrix.os }}/${{ matrix.python-version }}
161+
needs: [prepare-cache]
107162
runs-on: ${{ matrix.os }}
108163
strategy:
109164
max-parallel: 1
110165
fail-fast: false
111166
matrix:
112-
os: ["windows-latest", "macos-latest"]
113-
python-version: [3.9]
167+
os: ["windows-latest", "macos-latest", "ubuntu-latest"]
168+
python-version: [3.8, 3.9]
114169

115170
steps:
116171
- uses: actions/checkout@v2
@@ -129,18 +184,18 @@ jobs:
129184
export PATH="${CONDA_PREFIX}/bin:${CONDA_PREFIX}/Library/bin:$PATH" # so setup.py finds nc-config
130185
pip install -e . --no-deps --force-reinstall
131186
132-
- name: Install MHKiT
133-
shell: bash -l {0}
134-
run: |
135-
source activate TEST
136-
python -m pip install --upgrade pip wheel
137-
pip install coveralls
138-
pip install .
187+
- name: Download data from artifact
188+
uses: actions/download-artifact@v2
189+
with:
190+
name: data
191+
path: ~/.cache/mhkit
139192

140-
- name: Run pytest
193+
- name: Install MHKiT and run pytest
141194
shell: bash -l {0}
142195
run: |
143196
source activate TEST
197+
python -m pip install --upgrade pip wheel
198+
pip install coveralls .
144199
coverage run --rcfile=.github/workflows/.coveragehindcastrc -m pytest -c .github/workflows/pytest-hindcast.ini
145200
146201
- name: Upload coverage data to coveralls.io

mhkit/river/io/usgs.py

Lines changed: 66 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
1-
import pandas as pd
2-
import numpy as np
1+
import os
32
import json
43
import requests
4+
import shutil
5+
import pandas as pd
6+
from mhkit.utils.cache import handle_caching
7+
58

69
def _read_usgs_json(text):
7-
10+
811
data = pd.DataFrame()
912
for i in range(len(text['value']['timeSeries'])):
1013
try:
11-
site_name = text['value']['timeSeries'][i]['variable']['variableDescription'] #text['value']['timeSeries'][i]['sourceInfo']['siteName']
12-
site_data = pd.DataFrame(text['value']['timeSeries'][i]['values'][0]['value'])
14+
site_name = text['value']['timeSeries'][i]['variable']['variableDescription']
15+
site_data = pd.DataFrame(
16+
text['value']['timeSeries'][i]['values'][0]['value'])
1317
site_data.set_index('dateTime', drop=True, inplace=True)
1418
site_data.index = pd.to_datetime(site_data.index, utc=True)
1519
site_data.rename(columns={'value': site_name}, inplace=True)
@@ -19,8 +23,9 @@ def _read_usgs_json(text):
1923
data = data.combine_first(site_data)
2024
except:
2125
pass
22-
23-
return data # we could also extract metadata and return that here
26+
27+
return data
28+
2429

2530
def read_usgs_file(file_name):
2631
"""
@@ -30,7 +35,7 @@ def read_usgs_file(file_name):
3035
----------
3136
file_name : str
3237
Name of USGS JSON data file
33-
38+
3439
Returns
3540
-------
3641
data : pandas DataFrame
@@ -39,18 +44,25 @@ def read_usgs_file(file_name):
3944
"""
4045
with open(file_name) as json_file:
4146
text = json.load(json_file)
42-
47+
4348
data = _read_usgs_json(text)
44-
45-
return data
49+
50+
return data
4651

4752

48-
def request_usgs_data(station, parameter, start_date, end_date,
49-
data_type='Daily', proxy=None, write_json=None):
53+
def request_usgs_data(
54+
station,
55+
parameter,
56+
start_date,
57+
end_date,
58+
data_type='Daily',
59+
proxy=None,
60+
write_json=None,
61+
clear_cache=False):
5062
"""
5163
Loads USGS data directly from https://waterdata.usgs.gov/nwis using a
5264
GET request
53-
65+
5466
The request URL prints to the screen.
5567
5668
Parameters
@@ -71,36 +83,58 @@ def request_usgs_data(station, parameter, start_date, end_date,
7183
for example {"http": 'localhost:8080'}
7284
write_json : str or None
7385
Name of json file to write data
74-
86+
clear_cache : bool
87+
If True, the cache for this specific request will be cleared.
88+
7589
Returns
7690
-------
7791
data : pandas DataFrame
7892
Data indexed by datetime with columns named according to the parameter's
7993
variable description
8094
"""
81-
assert data_type in ['Daily', 'Instantaneous'], 'data_type must be Daily or Instantaneous'
82-
95+
assert data_type in [
96+
'Daily', 'Instantaneous'], 'data_type must be Daily or Instantaneous'
97+
98+
# Define the path to the cache directory
99+
cache_dir = os.path.join(os.path.expanduser("~"),
100+
".cache", "mhkit", "usgs")
101+
102+
# Create a unique filename based on the function parameters
103+
hash_params = f"{station}_{parameter}_{start_date}_{end_date}_{data_type}"
104+
105+
# Use handle_caching to manage cache
106+
cached_data, metadata, cache_filepath = handle_caching(
107+
hash_params, cache_dir, write_json, clear_cache)
108+
109+
if cached_data is not None:
110+
return cached_data
111+
112+
# If no cached data, proceed with the API request
83113
if data_type == 'Daily':
84114
data_url = 'https://waterservices.usgs.gov/nwis/dv'
85-
api_query = '/?format=json&sites='+station+ \
86-
'&startDT='+start_date+'&endDT='+end_date+ \
87-
'&statCd=00003'+ \
115+
api_query = '/?format=json&sites='+station + \
116+
'&startDT='+start_date+'&endDT='+end_date + \
117+
'&statCd=00003' + \
88118
'&parameterCd='+parameter+'&siteStatus=all'
89119
else:
90120
data_url = 'https://waterservices.usgs.gov/nwis/iv'
91-
api_query = '/?format=json&sites='+station+ \
92-
'&startDT='+start_date+'&endDT='+end_date+ \
121+
api_query = '/?format=json&sites='+station + \
122+
'&startDT='+start_date+'&endDT='+end_date + \
93123
'&parameterCd='+parameter+'&siteStatus=all'
94-
124+
95125
print('Data request URL: ', data_url+api_query)
96-
97-
response = requests.get(url=data_url+api_query,proxies=proxy)
126+
127+
response = requests.get(url=data_url+api_query, proxies=proxy)
98128
text = json.loads(response.text)
99-
100-
if write_json is not None:
101-
with open(write_json, 'w') as outfile:
102-
json.dump(text, outfile)
103-
129+
104130
data = _read_usgs_json(text)
105-
106-
return data
131+
132+
# After making the API request and processing the response, write the
133+
# response to a cache file
134+
handle_caching(hash_params, cache_dir, data=data,
135+
clear_cache_file=clear_cache)
136+
137+
if write_json:
138+
shutil.copy(cache_filepath, write_json)
139+
140+
return data

0 commit comments

Comments
 (0)