Skip to content
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Recent released features
| High-frequency trading example | [Part of code released](https://github.com/microsoft/qlib/pull/227) on Jan 28, 2021 |
| High-frequency data(1min) | [Released](https://github.com/microsoft/qlib/pull/221) on Jan 27, 2021 |
| Tabnet Model | [Released](https://github.com/microsoft/qlib/pull/205) on Jan 22, 2021 |
| TCTS Model | [Released](https://github.com/microsoft/qlib/pull/491) on July 1, 2021 |

Features released before 2021 are not listed here.

Expand Down Expand Up @@ -288,6 +289,7 @@ Here is a list of models built on `Qlib`.
- [TFT based on tensorflow (Bryan Lim, et al. 2019)](examples/benchmarks/TFT/tft.py)
- [TabNet based on pytorch (Sercan O. Arik, et al. 2019)](qlib/contrib/model/pytorch_tabnet.py)
- [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. 2020)](qlib/contrib/model/double_ensemble.py)
- [TCTS based on pytorch (Xueqing Wu, et al. 2021)](qlib/contrib/model/pytorch_tcts.py)

Your PR of new Quant models is highly welcomed.

Expand Down
52 changes: 0 additions & 52 deletions examples/benchmarks/TCTS/TCTS.md

This file was deleted.

20 changes: 10 additions & 10 deletions examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ data_handler_config: &data_handler_config
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1",
"Ref($close, -3) / Ref($close, -1) - 1",
"Ref($close, -4) / Ref($close, -1) - 1",
"Ref($close, -5) / Ref($close, -1) - 1",
"Ref($close, -6) / Ref($close, -1) - 1"]
label: ["Ref($close, -1) / $close - 1",
"Ref($close, -2) / Ref($close, -1) - 1",
"Ref($close, -3) / Ref($close, -2) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
Expand Down Expand Up @@ -61,11 +59,12 @@ task:
GPU: 0
fore_optimizer: adam
weight_optimizer: adam
output_dim: 5
fore_lr: 5e-7
weight_lr: 5e-7
output_dim: 3
fore_lr: 5e-4
weight_lr: 5e-4
steps: 3
target_label: 0
target_label: 1
lowest_valid_performance: 0.993
dataset:
class: DatasetH
module_path: qlib.data.dataset
Expand All @@ -87,7 +86,8 @@ task:
kwargs:
ana_long_short: False
ann_scaler: 252
label_col: 1
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config
config: *port_analysis_config
107 changes: 67 additions & 40 deletions qlib/contrib/model/pytorch_tcts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import numpy as np
import pandas as pd
import copy
import random
from sklearn.metrics import roc_auc_score, mean_squared_error
import logging
from ...utils import (
unpack_archive_with_buffer,
save_multiple_parts_file,
create_save_path,
get_or_create_path,
drop_nan_by_y_index,
)
from ...log import get_module_logger, TimeInspector
Expand Down Expand Up @@ -60,8 +61,9 @@ def __init__(
weight_lr=5e-7,
steps=3,
GPU=0,
seed=None,
seed=0,
target_label=0,
lowest_valid_performance=0.993,
**kwargs
):
# Set logger.
Expand All @@ -85,6 +87,9 @@ def __init__(
self.weight_lr = weight_lr
self.steps = steps
self.target_label = target_label
self.lowest_valid_performance = lowest_valid_performance
self._fore_optimizer = fore_optimizer
self._weight_optimizer = weight_optimizer

self.logger.info(
"TCTS parameters setting:"
Expand Down Expand Up @@ -113,40 +118,6 @@ def __init__(
)
)

if self.seed is not None:
np.random.seed(self.seed)
torch.manual_seed(self.seed)

self.fore_model = GRUModel(
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
)
self.weight_model = MLPModel(
d_feat=360 + 2 * self.output_dim + 1,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
output_dim=self.output_dim,
)
if fore_optimizer.lower() == "adam":
self.fore_optimizer = optim.Adam(self.fore_model.parameters(), lr=self.fore_lr)
elif fore_optimizer.lower() == "gd":
self.fore_optimizer = optim.SGD(self.fore_model.parameters(), lr=self.fore_lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(fore_optimizer))
if weight_optimizer.lower() == "adam":
self.weight_optimizer = optim.Adam(self.weight_model.parameters(), lr=self.weight_lr)
elif weight_optimizer.lower() == "gd":
self.weight_optimizer = optim.SGD(self.weight_model.parameters(), lr=self.weight_lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(weight_optimizer))

self.fitted = False
self.fore_model.to(self.device)
self.weight_model.to(self.device)

def loss_fn(self, pred, label, weight):

loc = torch.argmax(weight, 1)
Expand Down Expand Up @@ -258,11 +229,9 @@ def test_epoch(self, data_x, data_y):
def fit(
self,
dataset: DatasetH,
evals_result=dict(),
verbose=True,
save_path=None,
):

df_train, df_valid, df_test = dataset.prepare(
["train", "valid", "test"],
col_set=["feature", "label"],
Expand All @@ -274,7 +243,62 @@ def fit(
x_test, y_test = df_test["feature"], df_test["label"]

if save_path == None:
save_path = create_save_path(save_path)
save_path = get_or_create_path(save_path)
best_loss = np.inf
while best_loss > self.lowest_valid_performance:
if best_loss < np.inf:
print("Failed! Start retraining.")
self.seed = random.randint(0, 1000) # reset random seed

if self.seed is not None:
np.random.seed(self.seed)
torch.manual_seed(self.seed)

best_loss = self.training(
x_train, y_train, x_valid, y_valid, x_test, y_test, verbose=verbose, save_path=save_path
)

def training(
self,
x_train,
y_train,
x_valid,
y_valid,
x_test,
y_test,
verbose=True,
save_path=None,
):

self.fore_model = GRUModel(
d_feat=self.d_feat,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
)
self.weight_model = MLPModel(
d_feat=360 + 2 * self.output_dim + 1,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
dropout=self.dropout,
output_dim=self.output_dim,
)
if self._fore_optimizer.lower() == "adam":
self.fore_optimizer = optim.Adam(self.fore_model.parameters(), lr=self.fore_lr)
elif self._fore_optimizer.lower() == "gd":
self.fore_optimizer = optim.SGD(self.fore_model.parameters(), lr=self.fore_lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(self._fore_optimizer))
if self._weight_optimizer.lower() == "adam":
self.weight_optimizer = optim.Adam(self.weight_model.parameters(), lr=self.weight_lr)
elif self._weight_optimizer.lower() == "gd":
self.weight_optimizer = optim.SGD(self.weight_model.parameters(), lr=self.weight_lr)
else:
raise NotImplementedError("optimizer {} is not supported!".format(self._weight_optimizer))

self.fitted = False
self.fore_model.to(self.device)
self.weight_model.to(self.device)

best_loss = np.inf
best_epoch = 0
Expand All @@ -291,7 +315,8 @@ def fit(
val_loss = self.test_epoch(x_valid, y_valid)
test_loss = self.test_epoch(x_test, y_test)

print("valid %.6f, test %.6f" % (val_loss, test_loss))
if verbose:
print("valid %.6f, test %.6f" % (val_loss, test_loss))

if val_loss < best_loss:
best_loss = val_loss
Expand All @@ -316,6 +341,8 @@ def fit(
if self.use_gpu:
torch.cuda.empty_cache()

return best_loss

def predict(self, dataset):
if not self.fitted:
raise ValueError("model is not fitted yet!")
Expand Down
7 changes: 4 additions & 3 deletions qlib/workflow/record_temp.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,11 @@ class SigAnaRecord(SignalRecord):

artifact_path = "sig_analysis"

def __init__(self, recorder, ana_long_short=False, ann_scaler=252, **kwargs):
def __init__(self, recorder, ana_long_short=False, ann_scaler=252, label_col=0, **kwargs):
super().__init__(recorder=recorder, **kwargs)
self.ana_long_short = ana_long_short
self.ann_scaler = ann_scaler
self.label_col = label_col

def generate(self, **kwargs):
try:
Expand All @@ -243,7 +244,7 @@ def generate(self, **kwargs):
if label is None or not isinstance(label, pd.DataFrame) or label.empty:
logger.warn(f"Empty label.")
return
ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, self.label_col])
metrics = {
"IC": ic.mean(),
"ICIR": ic.mean() / ic.std(),
Expand All @@ -252,7 +253,7 @@ def generate(self, **kwargs):
}
objects = {"ic.pkl": ic, "ric.pkl": ric}
if self.ana_long_short:
long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], label.iloc[:, 0])
long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], label.iloc[:, self.label_col])
metrics.update(
{
"Long-Short Ann Return": long_short_r.mean() * self.ann_scaler,
Expand Down