Skip to content

Commit 809cd22

Browse files
authored
Arch update 21/11 - let's simplify it a bit ;) (#1346)
* a few simplifications * docs simplifications * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * contrib simplification * fix
1 parent ac8567d commit 809cd22

File tree

236 files changed

+1409
-6317
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+1409
-6317
lines changed

CHANGELOG.md

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,36 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
1212

1313
### Changed
1414

15-
-
15+
- A few framework simplifications were made ([#1346](https://github.com/catalyst-team/catalyst/pull/1346)):
16+
- `catalyst-contrib` scripts reduced to `collect-env` and `project-embeddings` only
17+
- `catalyst-dl` scripts recuded to `run` and `tune` only
18+
- `transforms.` prefix deprecated for Catalyst-based transforms
19+
- `catalyst.tools` moved to `catalyst.extras`
20+
- task-dependent extensions from `catalyst.data` moved to `catalyst.contrib.data`
21+
- `catalyst.data.transforms` moved to `catalyst.contrib.data.transforms`
22+
- `Normalize`, `ToTensor` transforms renamed to `NormalizeImage`, `ImageToTensor`
23+
- metric learning extensions moved to `catalyst.contrib.data`
24+
- `catalyst.contrib` moved to code-as-a-documentation development
25+
- `catalyst[cv]` and `catalyst[ml]` extensions moved to flatten architecture design; examples: `catalyst.contrib.data.dataset_cv`, `catalyst.contrib.data.dataset_ml`
26+
- `catalyst.contrib` moved to flatten architecture design; exampels: `catalyst.contrib.data`, `catalyst.contrib.datasets`, `catalyst.contrib.layers`, `catalyst.contrib.models`, `catalyst.contrib.optimizers`, `catalyst.contrib.schedulers`
27+
- internal functionality moved to `***._misc` modules
28+
- `catalyst.utils.mixup` moved to `catalyst.utils.torch`
29+
- `catalyst.utils.numpy` moved to `catalyst.contrib.utils.numpy`
30+
1631

1732
### Removed
1833

19-
-
34+
- A few framework simplifications were made ([#1346](https://github.com/catalyst-team/catalyst/pull/1346)):
35+
- `catalyst.contrib.pandas`
36+
- `catalyst.contrib.parallel`
37+
- `catalyst.contrib.models.cv`
38+
- a few `catalyst.utils.misc` functions
39+
- `catalyst.extras` removed from the public documentation
40+
2041

2142
### Fixed
2243

23-
-
44+
- documentation search error (21.10 only) ([#1346](https://github.com/catalyst-team/catalyst/pull/1346))
2445

2546

2647
## [21.10] - 2021-10-30
@@ -498,7 +519,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
498519
- Runner registry support for Config API ([#936](https://github.com/catalyst-team/catalyst/pull/936))
499520
- `catalyst-dl tune` command - Optuna with Config API integration for AutoML hyperparameters optimization ([#937](https://github.com/catalyst-team/catalyst/pull/937))
500521
- `OptunaPruningCallback` alias for `OptunaCallback` ([#937](https://github.com/catalyst-team/catalyst/pull/937))
501-
- AdamP and SGDP to `catalyst.contrib.nn.criterion` ([#942](https://github.com/catalyst-team/catalyst/pull/942))
522+
- AdamP and SGDP to `catalyst.contrib.losses` ([#942](https://github.com/catalyst-team/catalyst/pull/942))
502523

503524
### Changed
504525

README.md

Lines changed: 46 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,14 @@ import os
6161
from torch import nn, optim
6262
from torch.utils.data import DataLoader
6363
from catalyst import dl, utils
64-
from catalyst.data import ToTensor
65-
from catalyst.contrib.datasets import MNIST
64+
from catalyst.contrib import ImageToTensor, MNIST
6665

6766
model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
6867
criterion = nn.CrossEntropyLoss()
6968
optimizer = optim.Adam(model.parameters(), lr=0.02)
7069

71-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
72-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
70+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
71+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
7372
loaders = {
7473
"train": DataLoader(train_data, batch_size=32),
7574
"valid": DataLoader(valid_data, batch_size=32),
@@ -105,7 +104,7 @@ metrics = runner.evaluate_loader(
105104
loader=loaders["valid"],
106105
callbacks=[dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5))],
107106
)
108-
assert "accuracy" in metrics.keys()
107+
assert "accuracy01" in metrics.keys()
109108

110109
# model inference
111110
for prediction in runner.predict_loader(loader=loaders["valid"]):
@@ -115,13 +114,13 @@ features_batch = next(iter(loaders["valid"]))[0]
115114
# model stochastic weight averaging
116115
model.load_state_dict(utils.get_averaged_weights_by_path_mask(logdir="./logs", path_mask="*.pth"))
117116
# model tracing
118-
utils.trace_model(model=runner.model, batch=features_batch)
117+
utils.trace_model(model=runner.model.cpu(), batch=features_batch)
119118
# model quantization
120119
utils.quantize_model(model=runner.model)
121120
# model pruning
122121
utils.prune_model(model=runner.model, pruning_fn="l1_unstructured", amount=0.8)
123122
# onnx export
124-
utils.onnx_export(model=runner.model, batch=features_batch, file="./logs/mnist.onnx", verbose=True)
123+
utils.onnx_export(model=runner.model.cpu(), batch=features_batch, file="./logs/mnist.onnx", verbose=True)
125124
```
126125

127126
### Step-by-step Guide
@@ -248,14 +247,13 @@ from torch import nn, optim
248247
from torch.nn import functional as F
249248
from torch.utils.data import DataLoader
250249
from catalyst import dl, metrics
251-
from catalyst.data import ToTensor
252-
from catalyst.contrib.datasets import MNIST
250+
from catalyst.contrib import ImageToTensor, MNIST
253251

254252
model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
255253
optimizer = optim.Adam(model.parameters(), lr=0.02)
256254

257-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
258-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
255+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
256+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
259257
loaders = {
260258
"train": DataLoader(train_data, batch_size=32),
261259
"valid": DataLoader(valid_data, batch_size=32),
@@ -325,7 +323,7 @@ for logits in runner.predict_loader(loader=loaders["valid"]):
325323
<p>
326324

327325
```python
328-
import torch
326+
import torchx
329327
from torch.utils.data import DataLoader, TensorDataset
330328
from catalyst import dl
331329

@@ -651,15 +649,14 @@ import os
651649
from torch import nn, optim
652650
from torch.utils.data import DataLoader
653651
from catalyst import dl
654-
from catalyst.data import ToTensor
655-
from catalyst.contrib.datasets import MNIST
652+
from catalyst.contrib import ImageToTensor, MNIST
656653

657654
model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
658655
criterion = nn.CrossEntropyLoss()
659656
optimizer = optim.Adam(model.parameters(), lr=0.02)
660657

661-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
662-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
658+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
659+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
663660
loaders = {
664661
"train": DataLoader(train_data, batch_size=32),
665662
"valid": DataLoader(valid_data, batch_size=32),
@@ -706,9 +703,7 @@ import torch
706703
from torch import nn
707704
from torch.utils.data import DataLoader
708705
from catalyst import dl
709-
from catalyst.data import ToTensor
710-
from catalyst.contrib.datasets import MNIST
711-
from catalyst.contrib.nn import IoULoss
706+
from catalyst.contrib import ImageToTensor, IoULoss, MNIST
712707

713708

714709
model = nn.Sequential(
@@ -718,8 +713,8 @@ model = nn.Sequential(
718713
criterion = IoULoss()
719714
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
720715

721-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
722-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
716+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
717+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
723718
loaders = {
724719
"train": DataLoader(train_data, batch_size=32),
725720
"valid": DataLoader(valid_data, batch_size=32),
@@ -769,17 +764,16 @@ from torch import nn, optim
769764
from torch.nn import functional as F
770765
from torch.utils.data import DataLoader
771766
from catalyst import dl
772-
from catalyst.data import ToTensor
773-
from catalyst.contrib.datasets import MNIST
767+
from catalyst.contrib import ImageToTensor, MNIST
774768

775769
# [!] teacher model should be already pretrained
776770
teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
777771
student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
778772
criterion = {"cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean")}
779773
optimizer = optim.Adam(student.parameters(), lr=0.02)
780774

781-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
782-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
775+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
776+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
783777
loaders = {
784778
"train": DataLoader(train_data, batch_size=32),
785779
"valid": DataLoader(valid_data, batch_size=32),
@@ -801,23 +795,13 @@ class DistilRunner(dl.Runner):
801795

802796
runner = DistilRunner()
803797
callbacks = [
804-
dl.AccuracyCallback(
805-
input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"
806-
),
807-
dl.AccuracyCallback(
808-
input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"
809-
),
810-
dl.CriterionCallback(
811-
input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls"
812-
),
813-
dl.CriterionCallback(
814-
input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl"
815-
),
798+
dl.AccuracyCallback(input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"),
799+
dl.AccuracyCallback(input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"),
800+
dl.CriterionCallback(input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls"),
801+
dl.CriterionCallback(input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl"),
816802
dl.MetricAggregationCallback(metric_key="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"),
817803
dl.OptimizerCallback(metric_key="loss", model_key="student"),
818-
dl.CheckpointCallback(
819-
logdir="./logs", loader_key="valid", metric_key="loss", minimize=True, save_n_best=3
820-
),
804+
dl.CheckpointCallback(logdir="./logs", loader_key="valid", metric_key="loss", minimize=True, save_n_best=3),
821805
]
822806
# model training
823807
runner.train(
@@ -843,16 +827,19 @@ runner.train(
843827
import os
844828
from torch.optim import Adam
845829
from torch.utils.data import DataLoader
846-
from catalyst import data, dl
847-
from catalyst.contrib import datasets, models, nn
848-
from catalyst.data.transforms import Compose, Normalize, ToTensor
830+
from catalyst import dl
831+
from catalyst.data import BatchBalanceClassSampler
832+
from catalyst.contrib import data, datasets, models, nn
849833

850834

851835
# 1. train and valid loaders
852-
transforms = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])
836+
transforms = data.Compose([
837+
data.ImageToTensor(),
838+
data.NormalizeImage((0.1307,), (0.3081,))
839+
])
853840

854841
train_dataset = datasets.MnistMLDataset(root=os.getcwd(), download=True, transform=transforms)
855-
sampler = data.BatchBalanceClassSampler(
842+
sampler = BatchBalanceClassSampler(
856843
labels=train_dataset.get_labels(), num_classes=5, num_samples=10, num_batches=10
857844
)
858845
train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler)
@@ -929,9 +916,7 @@ import torch
929916
from torch import nn
930917
from torch.utils.data import DataLoader
931918
from catalyst import dl
932-
from catalyst.contrib.datasets import MNIST
933-
from catalyst.contrib.nn.modules import Flatten, GlobalMaxPool2d, Lambda
934-
from catalyst.data import ToTensor
919+
from catalyst.contrib import Flatten, GlobalMaxPool2d, Lambda, MNIST, ImageToTensor
935920

936921
latent_dim = 128
937922
generator = nn.Sequential(
@@ -962,7 +947,7 @@ optimizer = {
962947
"generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)),
963948
"discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)),
964949
}
965-
train_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
950+
train_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
966951
loaders = {"train": DataLoader(train_data, batch_size=32)}
967952

968953
class CustomRunner(dl.Runner):
@@ -1070,8 +1055,7 @@ from torch import nn, optim
10701055
from torch.nn import functional as F
10711056
from torch.utils.data import DataLoader
10721057
from catalyst import dl, metrics
1073-
from catalyst.contrib.datasets import MNIST
1074-
from catalyst.data import ToTensor
1058+
from catalyst.contrib import ImageToTensor, MNIST
10751059

10761060
LOG_SCALE_MAX = 2
10771061
LOG_SCALE_MIN = -10
@@ -1124,8 +1108,8 @@ class CustomRunner(dl.IRunner):
11241108
return 3
11251109

11261110
def get_loaders(self, stage: str):
1127-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
1128-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
1111+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
1112+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
11291113
loaders = {
11301114
"train": DataLoader(train_data, batch_size=32),
11311115
"valid": DataLoader(valid_data, batch_size=32),
@@ -1197,8 +1181,7 @@ import os
11971181
from torch import nn, optim
11981182
from torch.utils.data import DataLoader
11991183
from catalyst import dl, utils
1200-
from catalyst.contrib.datasets import MNIST
1201-
from catalyst.data import ToTensor
1184+
from catalyst.contrib import ImageToTensor, MNIST
12021185

12031186

12041187
class CustomRunner(dl.IRunner):
@@ -1225,8 +1208,8 @@ class CustomRunner(dl.IRunner):
12251208
return 3
12261209

12271210
def get_loaders(self, stage: str):
1228-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
1229-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
1211+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
1212+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
12301213
loaders = {
12311214
"train": DataLoader(train_data, batch_size=32),
12321215
"valid": DataLoader(valid_data, batch_size=32),
@@ -1305,8 +1288,7 @@ import os
13051288
from torch import nn, optim
13061289
from torch.utils.data import DataLoader
13071290
from catalyst import dl, utils
1308-
from catalyst.contrib.datasets import MNIST
1309-
from catalyst.data import ToTensor
1291+
from catalyst.contrib import ImageToTensor, MNIST
13101292

13111293

13121294
class CustomRunner(dl.IRunner):
@@ -1332,8 +1314,8 @@ class CustomRunner(dl.IRunner):
13321314
return 3
13331315

13341316
def get_loaders(self, stage: str):
1335-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
1336-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
1317+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
1318+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
13371319
loaders = {
13381320
"train": DataLoader(train_data, batch_size=32),
13391321
"valid": DataLoader(valid_data, batch_size=32),
@@ -1420,16 +1402,15 @@ import torch
14201402
from torch import nn
14211403
from torch.utils.data import DataLoader
14221404
from catalyst import dl
1423-
from catalyst.data import ToTensor
1424-
from catalyst.contrib.datasets import MNIST
1405+
from catalyst.contrib import ImageToTensor, MNIST
14251406

14261407

14271408
def objective(trial):
14281409
lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
14291410
num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128))
14301411

1431-
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
1432-
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())
1412+
train_data = MNIST(os.getcwd(), train=True, download=True, transform=ImageToTensor())
1413+
valid_data = MNIST(os.getcwd(), train=False, download=True, transform=ImageToTensor())
14331414
loaders = {
14341415
"train": DataLoader(train_data, batch_size=32),
14351416
"valid": DataLoader(valid_data, batch_size=32),

0 commit comments

Comments
 (0)