[Fix] composed_staged failing with modifiers with dynamically inferred end_epoch (#682)

dbogunowicz · bogunowicz@arrival.com · bfineran · bogunowicz@arrival.com · commit 81e50843ce49 · 2022-04-11T16:29:54.000+02:00
* initial commit

* added incrementing of epoch_end in base_stages plus a unit test

* Correct the formating of the test input

* .

* Merge tests together

Co-authored-by: bogunowicz@arrival.com &lt;bogunowicz@arrival.com&gt;
Co-authored-by: Benjamin Fineran &lt;bfineran@users.noreply.github.com&gt;
Co-authored-by: Konstantin Gulin &lt;66528950+KSGulin@users.noreply.github.com&gt;
diff --git a/src/sparseml/optim/manager.py b/src/sparseml/optim/manager.py
@@ -223,9 +223,28 @@ def compose_staged(
         if not keep_original_epochs:
             # update additional modifier epochs
             base_end_epoch = base_recipe.max_epochs
+
+            # make sure that for the modifiers in base_stages
+            # with the initial attribute `end_epoch` = -1,
+            # this attribute value is replaced with `base_end_epoch`
+            for base_modifiers in base_stages.values():
+                for base_modifier in base_modifiers:
+                    if (
+                        hasattr(base_modifier, "end_epoch")
+                        and base_modifier.end_epoch == -1
+                    ):
+                        base_modifier._init_end = base_end_epoch
+                        base_modifier.end_epoch = base_end_epoch
+
             for additional_modifiers in additional_stages.values():
                 for additional_modifier in additional_modifiers:
-                    if hasattr(additional_modifier, "end_epoch"):
+                    if (
+                        hasattr(additional_modifier, "end_epoch")
+                        and additional_modifier.end_epoch != -1
+                    ):
+                        # if end_epoch == -1, the .end_epoch is being
+                        # assumed implicitly and does not need to be
+                        # incremented
                         additional_modifier.end_epoch += base_end_epoch
                     if hasattr(additional_modifier, "start_epoch"):
                         additional_modifier.start_epoch += base_end_epoch
diff --git a/tests/sparseml/pytorch/optim/test_manager.py b/tests/sparseml/pytorch/optim/test_manager.py
@@ -417,6 +417,88 @@
   
 """  # noqa: W293
 
+RECIPE_END_EPOCH_IMPLICIT = """
+training_modifiers:
+  - !EpochRangeModifier
+    start_epoch: 0.0
+    end_epoch: 52
+
+  - !SetLearningRateModifier
+    start_epoch: 50
+    learning_rate: 0.000002
+
+pruning_modifiers:
+  - !ConstantPruningModifier
+    start_epoch: 0.0
+    params: __ALL_PRUNABLE__
+
+quantization_modifiers:
+  - !QuantizationModifier
+    start_epoch: 50
+    submodules: ['model.0']
+"""
+
+COMPOSED_RECIPE_END_EPOCH_IMPLICIT = """version: 1.1.0
+
+stage_0:
+  __metadata__: None
+
+  stage_0_modifiers:
+      - !ConstantPruningModifier
+          end_epoch: 52
+          params: __ALL_PRUNABLE__
+          start_epoch: 0.0
+          update_frequency: -1
+  
+      - !EpochRangeModifier
+          end_epoch: 52
+          start_epoch: 0.0
+  
+      - !QuantizationModifier
+          end_epoch: 52
+          quantize_embeddings: True
+          quantize_linear_activations: True
+          reduce_range: False
+          start_epoch: 50
+          submodules: ['model.0']
+  
+      - !SetLearningRateModifier
+          constant_logging: False
+          end_epoch: 52
+          learning_rate: 2e-06
+          start_epoch: 50
+  
+
+stage_1:
+  __metadata__: None
+
+  stage_1_modifiers:
+      - !EpochRangeModifier
+          end_epoch: 104
+          start_epoch: 52.0
+  
+      - !ConstantPruningModifier
+          end_epoch: -1.0
+          params: __ALL_PRUNABLE__
+          start_epoch: 52.0
+          update_frequency: -1
+  
+      - !QuantizationModifier
+          end_epoch: -1.0
+          quantize_embeddings: True
+          quantize_linear_activations: True
+          reduce_range: False
+          start_epoch: 102
+          submodules: ['model.0']
+  
+      - !SetLearningRateModifier
+          constant_logging: False
+          end_epoch: -1.0
+          learning_rate: 2e-06
+          start_epoch: 102
+  
+"""  # noqa: W293
+
 
 def _generate_fake_metadata(item1=("metadata", None), item2=("level", 1)):
     return {k: v for (k, v) in (item1, item2)}
@@ -558,6 +640,16 @@ def _generate_fake_metadata(item1=("metadata", None), item2=("level", 1)):
             False,
             True,
         ),
+        # Testing composing two recipes with modifiers containing
+        # implicit `end_epoch` attribution (i.e. `end_epoch = -1`)
+        (
+            RECIPE_END_EPOCH_IMPLICIT,
+            RECIPE_END_EPOCH_IMPLICIT,
+            None,
+            COMPOSED_RECIPE_END_EPOCH_IMPLICIT,
+            False,
+            False,
+        ),
     ],
 )
 def test_lifecycle_manager_staged(