Merge pull request #1609 from roboflow/feat/metrics-precision-recall

LinasKo · web-flow · commit ea776b58a2ff · 2024-10-18T16:16:21.000+03:00
Feat/metrics precision recall
diff --git a/docs/metrics/common_values.md b/docs/metrics/common_values.md
@@ -0,0 +1,20 @@
+---
+comments: true
+status: new
+---
+
+# Common Values
+
+This page contains supplementary values, types and enums that metrics use.
+
+<div class="md-typeset">
+    <h2><a href="#supervision.metrics.core.MetricTarget">MetricTarget</a></h2>
+</div>
+
+:::supervision.metrics.core.MetricTarget
+
+<div class="md-typeset">
+    <h2><a href="#supervision.metrics.core.AveragingMethod">AveragingMethod</a></h2>
+</div>
+
+:::supervision.metrics.core.AveragingMethod
diff --git a/docs/metrics/precision.md b/docs/metrics/precision.md
@@ -0,0 +1,18 @@
+---
+comments: true
+status: new
+---
+
+# Precision
+
+<div class="md-typeset">
+    <h2><a href="#supervision.metrics.precision.Precision">Precision</a></h2>
+</div>
+
+:::supervision.metrics.precision.Precision
+
+<div class="md-typeset">
+    <h2><a href="#supervision.metrics.precision.PrecisionResult">PrecisionResult</a></h2>
+</div>
+
+:::supervision.metrics.precision.PrecisionResult
diff --git a/docs/metrics/recall.md b/docs/metrics/recall.md
@@ -0,0 +1,18 @@
+---
+comments: true
+status: new
+---
+
+# Recall
+
+<div class="md-typeset">
+    <h2><a href="#supervision.metrics.recall.Recall">Recall</a></h2>
+</div>
+
+:::supervision.metrics.recall.Recall
+
+<div class="md-typeset">
+    <h2><a href="#supervision.metrics.recall.RecallResult">RecallResult</a></h2>
+</div>
+
+:::supervision.metrics.recall.RecallResult
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -66,7 +66,10 @@ nav:
           - Utils: datasets/utils.md
       - Metrics:
           - mAP: metrics/mean_average_precision.md
+          - Precision: metrics/precision.md
+          - Recall: metrics/recall.md
           - F1 Score: metrics/f1_score.md
+          - Common Values: metrics/common_values.md
           - Legacy Metrics: detection/metrics.md
       - Utils:
           - Video: utils/video.md
diff --git a/supervision/metrics/__init__.py b/supervision/metrics/__init__.py
@@ -8,6 +8,8 @@
     MeanAveragePrecision,
     MeanAveragePrecisionResult,
 )
+from supervision.metrics.precision import Precision, PrecisionResult
+from supervision.metrics.recall import Recall, RecallResult
 from supervision.metrics.utils.object_size import (
     ObjectSizeCategory,
     get_detection_size_category,
diff --git a/supervision/metrics/core.py b/supervision/metrics/core.py
@@ -37,9 +37,10 @@ class MetricTarget(Enum):
     """
     Specifies what type of detection is used to compute the metric.
 
-    * BOXES: xyxy bounding boxes
-    * MASKS: Binary masks
-    * ORIENTED_BOUNDING_BOXES: Oriented bounding boxes (OBB)
+    Attributes:
+        BOXES: xyxy bounding boxes
+        MASKS: Binary masks
+        ORIENTED_BOUNDING_BOXES: Oriented bounding boxes (OBB)
     """
 
     BOXES = "boxes"
@@ -54,15 +55,16 @@ class AveragingMethod(Enum):
     Suppose, before returning the final result, a metric is computed for each class.
     How do you combine those to get the final number?
 
-    * MACRO: Calculate the metric for each class and average the results. The simplest
-        averaging method, but it does not take class imbalance into account.
-    * MICRO: Calculate the metric globally by counting the total true positives, false
-        positives, and false negatives. Micro averaging is useful when you want to give
-        more importance to classes with more samples. It's also more appropriate if you
-        have an imbalance in the number of instances per class.
-    * WEIGHTED: Calculate the metric for each class and average the results, weighted by
-        the number of true instances of each class. Use weighted averaging if you want
-        to take class imbalance into account.
+    Attributes:
+        MACRO: Calculate the metric for each class and average the results. The simplest
+            averaging method, but it does not take class imbalance into account.
+        MICRO: Calculate the metric globally by counting the total true positives, false
+            positives, and false negatives. Micro averaging is useful when you want to
+            give more importance to classes with more samples. It's also more
+            appropriate if you have an imbalance in the number of instances per class.
+        WEIGHTED: Calculate the metric for each class and average the results, weighted
+            by the number of true instances of each class. Use weighted averaging if
+            you want to take class imbalance into account.
     """
 
     MACRO = "macro"
diff --git a/supervision/metrics/f1_score.py b/supervision/metrics/f1_score.py
@@ -23,11 +23,45 @@
 
 
 class F1Score(Metric):
+    """
+    F1 Score is a metric used to evaluate object detection models. It is the harmonic
+    mean of precision and recall, calculated at different IoU thresholds.
+
+    In simple terms, F1 Score is a measure of a model's balance between precision and
+    recall (accuracy and completeness), calculated as:
+
+    `F1 = 2 * (precision * recall) / (precision + recall)`
+
+    Example:
+        ```python
+        import supervision as sv
+        from supervision.metrics import F1Score
+
+        predictions = sv.Detections(...)
+        targets = sv.Detections(...)
+
+        f1_metric = F1Score()
+        f1_result = f1_metric.update(predictions, targets).compute()
+
+        print(f1_result)
+        print(f1_result.f1_50)
+        print(f1_result.small_objects.f1_50)
+        ```
+    """
+
     def __init__(
         self,
         metric_target: MetricTarget = MetricTarget.BOXES,
         averaging_method: AveragingMethod = AveragingMethod.WEIGHTED,
     ):
+        """
+        Initialize the F1Score metric.
+
+        Args:
+            metric_target (MetricTarget): The type of detection data to use.
+            averaging_method (AveragingMethod): The averaging method used to compute the
+                F1 scores. Determines how the F1 scores are aggregated across classes.
+        """
         self._metric_target = metric_target
         if self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
             raise NotImplementedError(
@@ -40,6 +74,9 @@ def __init__(
         self._targets_list: List[Detections] = []
 
     def reset(self) -> None:
+        """
+        Reset the metric to its initial state, clearing all stored data.
+        """
         self._predictions_list = []
         self._targets_list = []
 
@@ -48,6 +85,16 @@ def update(
         predictions: Union[Detections, List[Detections]],
         targets: Union[Detections, List[Detections]],
     ) -> F1Score:
+        """
+        Add new predictions and targets to the metric, but do not compute the result.
+
+        Args:
+            predictions (Union[Detections, List[Detections]]): The predicted detections.
+            targets (Union[Detections, List[Detections]]): The target detections.
+
+        Returns:
+            (F1Score): The updated metric instance.
+        """
         if not isinstance(predictions, list):
             predictions = [predictions]
         if not isinstance(targets, list):
@@ -65,6 +112,13 @@ def update(
         return self
 
     def compute(self) -> F1ScoreResult:
+        """
+        Calculate the F1 score metric based on the stored predictions and ground-truth
+        data, at different IoU thresholds.
+
+        Returns:
+            (F1ScoreResult): The F1 score metric result.
+        """
         result = self._compute(self._predictions_list, self._targets_list)
 
         small_predictions, small_targets = self._filter_predictions_and_targets_by_size(
@@ -373,7 +427,6 @@ class F1ScoreResult:
     The results of the F1 score metric calculation.
 
     Defaults to `0` if no detections or targets were provided.
-    Provides a custom `__str__` method for pretty printing.
 
     Attributes:
         metric_target (MetricTarget): the type of data used for the metric -
diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
@@ -23,6 +23,27 @@
 
 
 class MeanAveragePrecision(Metric):
+    """
+    Mean Average Precision (mAP) is a metric used to evaluate object detection models.
+    It is the average of the precision-recall curves at different IoU thresholds.
+
+    Example:
+        ```python
+        import supervision as sv
+        from supervision.metrics import MeanAveragePrecision
+
+        predictions = sv.Detections(...)
+        targets = sv.Detections(...)
+
+        map_metric = MeanAveragePrecision()
+        map_result = map_metric.update(predictions, targets).compute()
+
+        print(map_result)
+        print(map_result.map50_95)
+        map_result.plot()
+        ```
+    """
+
     def __init__(
         self,
         metric_target: MetricTarget = MetricTarget.BOXES,
@@ -47,6 +68,9 @@ def __init__(
         self._targets_list: List[Detections] = []
 
     def reset(self) -> None:
+        """
+        Reset the metric to its initial state, clearing all stored data.
+        """
         self._predictions_list = []
         self._targets_list = []
 
@@ -95,26 +119,10 @@ def compute(
     ) -> MeanAveragePrecisionResult:
         """
         Calculate Mean Average Precision based on predicted and ground-truth
-            detections at different thresholds.
+        detections at different thresholds.
 
         Returns:
-            (MeanAveragePrecisionResult): New instance of MeanAveragePrecision.
-
-        Example:
-            ```python
-            import supervision as sv
-            from supervision.metrics import MeanAveragePrecision
-
-            predictions = sv.Detections(...)
-            targets = sv.Detections(...)
-
-            map_metric = MeanAveragePrecision()
-            map_result = map_metric.update(predictions, targets).compute()
-
-            print(map_result)
-            print(map_result.map50_95)
-            map_result.plot()
-            ```
+            (MeanAveragePrecisionResult): The Mean Average Precision result.
         """
         result = self._compute(self._predictions_list, self._targets_list)
 
diff --git a/supervision/metrics/precision.py b/supervision/metrics/precision.py
diff --git a/supervision/metrics/recall.py b/supervision/metrics/recall.py

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,8 @@`
`8`	`8`	`MeanAveragePrecision,`
`9`	`9`	`MeanAveragePrecisionResult,`
`10`	`10`	`)`
	`11`	`+from supervision.metrics.precision import Precision, PrecisionResult`
	`12`	`+from supervision.metrics.recall import Recall, RecallResult`
`11`	`13`	`from supervision.metrics.utils.object_size import (`
`12`	`14`	`ObjectSizeCategory,`
`13`	`15`	`get_detection_size_category,`