Skip to content

Commit 582f09f

Browse files
Add basin filtering
Use basin filtering everywhere More filtering More filtering Fix accidental removal Add basin reduction options Clean up filtering code Add min 1 basin region pair req Add reg_to_basin return tech consolidate return logic Fix failing tests Tests failure due to missing valid_basins list. Adding list to context to fix issue.
1 parent 5b1449f commit 582f09f

File tree

13 files changed

+273
-58
lines changed

13 files changed

+273
-58
lines changed

message_ix_models/model/water/build.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from message_ix_models.model.structure import get_codes
1212
from message_ix_models.util import broadcast, package_data_path
1313

14-
from .utils import read_config
14+
from .utils import filter_basins_by_region, read_config
1515

1616
log = logging.getLogger(__name__)
1717

@@ -557,6 +557,10 @@ def map_basin(context: Context) -> Mapping[str, ScenarioInfo]:
557557
PATH = package_data_path("water", "delineation", FILE)
558558

559559
df = pd.read_csv(PATH)
560+
561+
# Apply basin filter to reduce number of basins per region
562+
df = filter_basins_by_region(df, context)
563+
560564
# Assigning proper nomenclature
561565
df["node"] = "B" + df["BCU_name"].astype(str)
562566
df["mode"] = "M" + df["BCU_name"].astype(str)
@@ -578,6 +582,8 @@ def map_basin(context: Context) -> Mapping[str, ScenarioInfo]:
578582
results["map_node"] = nodes
579583

580584
context.all_nodes = df["node"]
585+
# Store the filtered basin names for use in other functions
586+
context.valid_basins = set(df["BCU_name"].astype(str))
581587

582588
for set_name, config in results.items():
583589
# Sets to add

message_ix_models/model/water/cli.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,44 @@ def water_ini(context: "Context", regions, time):
119119
is_flag=True,
120120
help="Defines whether the model solves with macro",
121121
)
122+
@click.option(
123+
"--reduced-basin/--no-reduced-basin",
124+
default=False,
125+
help="Enable reduced basin filtering",
126+
)
127+
@click.option(
128+
"--filter-list",
129+
multiple=True,
130+
help="Specific basins to include (can be used multiple times)",
131+
)
132+
@click.option(
133+
"--num-basins",
134+
type=int,
135+
help="Number of basins per region to keep when reduced-basin is enabled",
136+
)
122137
@common_params("regions")
123138
@scenario_param("--ssp")
124-
def nexus_cli(context: "Context", regions, rcps, sdgs, rels, macro=False):
139+
def nexus_cli(
140+
context: "Context",
141+
regions,
142+
rcps,
143+
sdgs,
144+
rels,
145+
macro=False,
146+
reduced_basin=False,
147+
filter_list=None,
148+
num_basins=None,
149+
):
125150
"""
126151
Add basin structure connected to the energy sector and
127152
water balance linking different water demands to supply.
128153
"""
154+
# Set basin filtering configuration on context
155+
context.reduced_basin = reduced_basin
156+
if filter_list:
157+
context.filter_list = list(filter_list)
158+
if num_basins is not None:
159+
context.num_basins = num_basins
129160

130161
nexus(context, regions, rcps, sdgs, rels, macro)
131162

message_ix_models/model/water/data/demands.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ def add_sectoral_demands(context: "Context") -> dict[str, pd.DataFrame]:
218218
df_dmds.sort_values(["year", "node", "variable", "value"], inplace=True)
219219

220220
df_dmds["time"] = "year"
221+
222+
# Filter to only include basins that exist after basin filtering
223+
df_dmds = df_dmds[df_dmds["node"].isin(context.valid_basins)]
221224

222225
# Write final interpolated values as csv
223226
# df2_f.to_csv('final_interpolated_values.csv')
@@ -240,6 +243,9 @@ def add_sectoral_demands(context: "Context") -> dict[str, pd.DataFrame]:
240243
)
241244
df_m = df_m[["year", "pid", "variable", "value", "month"]]
242245
df_m.columns = pd.Index(["year", "node", "variable", "value", "time"])
246+
247+
# Filter monthly data to only include valid basins
248+
df_m = df_m[df_m["node"].isin(context.valid_basins)]
243249

244250
# remove yearly parts from df_dms
245251
df_dmds = df_dmds[
@@ -769,13 +775,11 @@ def read_water_availability(context: "Context") -> Sequence[pd.DataFrame]:
769775
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
770776
)
771777
df_x = pd.read_csv(PATH)
778+
779+
# Filter to only include valid basins
780+
df_x = df_x[df_x["BCU_name"].isin(context.valid_basins)]
772781

773782
if "year" in context.time:
774-
# path for reading basin delineation file
775-
PATH = package_data_path(
776-
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
777-
)
778-
df_x = pd.read_csv(PATH)
779783
# Adding freshwater supply constraints
780784
# Reading data, the data is spatially and temprally aggregated from GHMs
781785
path1 = package_data_path(
@@ -786,6 +790,14 @@ def read_water_availability(context: "Context") -> Sequence[pd.DataFrame]:
786790
# Read rcp 2.6 data
787791
df_sw = pd.read_csv(path1)
788792
df_sw.drop(["Unnamed: 0"], axis=1, inplace=True)
793+
794+
# Filter columns to only include valid basins
795+
# The columns are years, so we need to filter rows based on the original basin order
796+
# First, get the indices of valid basins from the original full list
797+
full_basin_df = pd.read_csv(PATH) # Read full basin list again
798+
valid_indices = full_basin_df[full_basin_df["BCU_name"].isin(context.valid_basins)].index
799+
df_sw = df_sw.iloc[valid_indices] # Keep only rows for valid basins
800+
df_sw.reset_index(drop=True, inplace=True)
789801

790802
df_sw.index = df_x["BCU_name"].index
791803
df_sw = df_sw.stack().reset_index()
@@ -811,6 +823,11 @@ def read_water_availability(context: "Context") -> Sequence[pd.DataFrame]:
811823
# Read groundwater data
812824
df_gw = pd.read_csv(path1)
813825
df_gw.drop(["Unnamed: 0"], axis=1, inplace=True)
826+
827+
# Filter to only include valid basins (same as df_sw)
828+
df_gw = df_gw.iloc[valid_indices] # Use same valid_indices from above
829+
df_gw.reset_index(drop=True, inplace=True)
830+
814831
df_gw.index = df_x["BCU_name"].index
815832
df_gw = df_gw.stack().reset_index()
816833
df_gw.columns = pd.Index(["Region", "years", "value"])
@@ -834,6 +851,12 @@ def read_water_availability(context: "Context") -> Sequence[pd.DataFrame]:
834851
)
835852
df_sw = pd.read_csv(path1)
836853
df_sw.drop(["Unnamed: 0"], axis=1, inplace=True)
854+
855+
# Filter to only include valid basins
856+
full_basin_df = pd.read_csv(PATH) # Read full basin list again
857+
valid_indices = full_basin_df[full_basin_df["BCU_name"].isin(context.valid_basins)].index
858+
df_sw = df_sw.iloc[valid_indices]
859+
df_sw.reset_index(drop=True, inplace=True)
837860

838861
df_sw.index = df_x["BCU_name"].index
839862
df_sw = df_sw.stack().reset_index()
@@ -857,6 +880,10 @@ def read_water_availability(context: "Context") -> Sequence[pd.DataFrame]:
857880
)
858881
df_gw = pd.read_csv(path1)
859882
df_gw.drop(["Unnamed: 0"], axis=1, inplace=True)
883+
884+
# Filter to only include valid basins (same as df_sw)
885+
df_gw = df_gw.iloc[valid_indices] # Use same valid_indices from above
886+
df_gw.reset_index(drop=True, inplace=True)
860887

861888
df_gw.index = df_x["BCU_name"].index
862889
df_gw = df_gw.stack().reset_index()

message_ix_models/model/water/data/infrastructure.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ def add_infrastructure_techs(context: "Context") -> dict[str, pd.DataFrame]:
224224
PATH = package_data_path("water", "delineation", FILE2)
225225

226226
df_node = pd.read_csv(PATH)
227+
228+
# Filter to only valid basins (already filtered in map_basin)
229+
df_node = df_node[df_node["BCU_name"].isin(context.valid_basins)]
230+
227231
# Assigning proper nomenclature
228232
df_node["node"] = "B" + df_node["BCU_name"].astype(str)
229233
df_node["mode"] = "M" + df_node["BCU_name"].astype(str)
@@ -825,6 +829,10 @@ def add_desalination(context: "Context") -> dict[str, pd.DataFrame]:
825829
PATH = package_data_path("water", "delineation", FILE2)
826830

827831
df_node = pd.read_csv(PATH)
832+
833+
# Filter to only valid basins (already filtered in map_basin)
834+
df_node = df_node[df_node["BCU_name"].isin(context.valid_basins)]
835+
828836
# Assigning proper nomenclature
829837
df_node["node"] = "B" + df_node["BCU_name"].astype(str)
830838
df_node["mode"] = "M" + df_node["BCU_name"].astype(str)
@@ -833,6 +841,11 @@ def add_desalination(context: "Context") -> dict[str, pd.DataFrame]:
833841
if context.type_reg == "country"
834842
else f"{context.regions}_" + df_node["REGION"].astype(str)
835843
)
844+
845+
# Filter historical and projected data to only include basins that exist after filtering
846+
df_hist = df_hist[df_hist["BCU_name"].isin(context.valid_basins)]
847+
df_proj = df_proj[df_proj["BCU_name"].isin(context.valid_basins)]
848+
836849
# output dataframe linking to desal tech types
837850
out_df = (
838851
make_df(

message_ix_models/model/water/data/irrigation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ def add_irr_structure(context: "Context") -> dict[str, pd.DataFrame]:
3232
FILE2 = f"basins_by_region_simpl_{context.regions}.csv"
3333
PATH = package_data_path("water", "delineation", FILE2)
3434
df_node = pd.read_csv(PATH)
35+
36+
# Filter to only include valid basins
37+
df_node = df_node[df_node["BCU_name"].isin(context.valid_basins)]
38+
3539
# Assigning proper nomenclature
3640
df_node["node"] = "B" + df_node["BCU_name"].astype(str)
3741
df_node["mode"] = "M" + df_node["BCU_name"].astype(str)

message_ix_models/model/water/data/water_supply.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
KM3_TO_MCM,
1212
USD_KM3_TO_USD_MCM,
1313
GWa_KM3_TO_GWa_MCM,
14+
filter_basins_by_region,
1415
get_vintage_and_active_years,
1516
)
1617
from message_ix_models.util import (
@@ -42,6 +43,9 @@ def map_basin_region_wat(context: "Context") -> pd.DataFrame:
4243
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
4344
)
4445
df_x = pd.read_csv(PATH)
46+
47+
# Filter to only include valid basins
48+
df_x = df_x[df_x["BCU_name"].isin(context.valid_basins)]
4549
# Adding freshwater supply constraints
4650
# Reading data, the data is spatially and temprally aggregated from GHMs
4751
path1 = package_data_path(
@@ -94,6 +98,9 @@ def map_basin_region_wat(context: "Context") -> pd.DataFrame:
9498
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
9599
)
96100
df_x = pd.read_csv(PATH)
101+
102+
# Filter to only include valid basins
103+
df_x = df_x[df_x["BCU_name"].isin(context.valid_basins)]
97104

98105
# Reading data, the data is spatially and temporally aggregated from GHMs
99106
df_sw["BCU_name"] = df_x["BCU_name"]
@@ -165,6 +172,10 @@ def add_water_supply(context: "Context") -> dict[str, pd.DataFrame]:
165172
PATH = package_data_path("water", "delineation", FILE)
166173

167174
df_node = pd.read_csv(PATH)
175+
176+
# Apply basin filter to reduce number of basins per region
177+
df_node = filter_basins_by_region(df_node, context)
178+
168179
# Assigning proper nomenclature
169180
df_node["node"] = "B" + df_node["BCU_name"].astype(str)
170181
df_node["mode"] = "M" + df_node["BCU_name"].astype(str)
@@ -191,6 +202,10 @@ def add_water_supply(context: "Context") -> dict[str, pd.DataFrame]:
191202
FILE2 = f"historical_new_cap_gw_sw_km3_year_{context.regions}.csv"
192203
PATH2 = package_data_path("water", "availability", FILE2)
193204
df_hist = pd.read_csv(PATH2)
205+
206+
# Filter to only include valid basins
207+
df_hist = df_hist[df_hist["BCU_name"].isin(context.valid_basins)]
208+
194209
df_hist["BCU_name"] = "B" + df_hist["BCU_name"].astype(str)
195210

196211
if context.nexus_set == "cooling":

0 commit comments

Comments
 (0)