Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion docs/en/advanced_guides/codecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,23 @@ def encode(self,
return encoded
```

The encoded data is converted to Tensor format in `PackPoseInputs` and packed in `data_sample.gt_instance_labels` for model calls, which is generally used for loss calculation, as demonstrated by `loss()` in `RegressionHead`.
The encoded data is converted to Tensor format in `PackPoseInputs` and packed in `data_sample.gt_instance_labels` for model calls. By default it will consist of the following encoded fields:

- `keypoint_labels`
- `keypoint_weights`
- `keypoints_visible_weights`

To specify data fields to be packed, you can define the `label_mapping_table` attribute in the codec. For example, in `VideoPoseLifting`:

```Python
label_mapping_table = dict(
trajectory_weights='trajectory_weights',
lifting_target_label='lifting_target_label',
lifting_target_weight='lifting_target_weight',
)
```

`data_sample.gt_instance_labels` are generally used for loss calculation, as demonstrated by `loss()` in `RegressionHead`.

```Python
def loss(self,
Expand All @@ -88,6 +104,10 @@ def loss(self,
### Omitted ###
```

```{note}
Encoder also defines data to be packed in `data_sample.gt_instances` and `data_sample.gt_fields`. Modify `instance_mapping_table` and `field_mapping_table` in the codec will specify values to be packed respectively. For default values, please check [BaseKeypointCodec](https://github.com/open-mmlab/mmpose/blob/main/mmpose/codecs/base.py).
```

### Decoder

The decoder transforms the model outputs into coordinates in the input image space, which is the opposite processing of the encoder.
Expand Down
22 changes: 21 additions & 1 deletion docs/zh_cn/advanced_guides/codecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,23 @@ def encode(self,
return encoded
```

编码后的数据会在 `PackPoseInputs` 中被转换为 Tensor 格式,并封装到 `data_sample.gt_instance_labels` 中供模型调用,一般主要用于 loss 计算,下面以 `RegressionHead` 中的 `loss()` 为例:
编码后的数据会在 `PackPoseInputs` 中被转换为 Tensor 格式,并封装到 `data_sample.gt_instance_labels` 中供模型调用,默认包含以下的字段:

- `keypoint_labels`
- `keypoint_weights`
- `keypoints_visible_weights`

如要指定要打包的数据字段,可以在编解码器中定义 `label_mapping_table` 属性。例如,在 `VideoPoseLifting` 中:

```Python
label_mapping_table = dict(
trajectory_weights='trajectory_weights',
lifting_target_label='lifting_target_label',
lifting_target_weight='lifting_target_weight',
)
```

`data_sample.gt_instance_labels` 一般主要用于 loss 计算,下面以 `RegressionHead` 中的 `loss()` 为例:

```Python
def loss(self,
Expand All @@ -88,6 +104,10 @@ def loss(self,
### 后续内容省略 ###
```

```{note}
解码器亦会定义封装在 `data_sample.gt_instances` 和 `data_sample.gt_fields` 中的字段。修改编码器中的 `instance_mapping_table` 和 `field_mapping_table` 的值将分别指定封装的字段,其中默认值定义在 [BaseKeypointCodec](https://github.com/open-mmlab/mmpose/blob/main/mmpose/codecs/base.py) 中。
```

### 解码器

解码器主要负责将模型的输出解码为输入图片尺度的坐标值,处理过程与编码器相反。
Expand Down
4 changes: 4 additions & 0 deletions mmpose/codecs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ class BaseKeypointCodec(metaclass=ABCMeta):
# mandatory `keypoints` and `keypoints_visible` arguments.
auxiliary_encode_keys = set()

field_mapping_table = dict()
instance_mapping_table = dict()
label_mapping_table = dict()

@abstractmethod
def encode(self,
keypoints: np.ndarray,
Expand Down
9 changes: 9 additions & 0 deletions mmpose/codecs/image_pose_lifting.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ class ImagePoseLifting(BaseKeypointCodec):

auxiliary_encode_keys = {'lifting_target', 'lifting_target_visible'}

instance_mapping_table = dict(
lifting_target='lifting_target',
lifting_target_visible='lifting_target_visible',
)
label_mapping_table = dict(
trajectory_weights='trajectory_weights',
lifting_target_label='lifting_target_label',
lifting_target_weight='lifting_target_weight')

def __init__(self,
num_keypoints: int,
root_index: int,
Expand Down
9 changes: 9 additions & 0 deletions mmpose/codecs/motionbert_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ class MotionBERTLabel(BaseKeypointCodec):
'lifting_target', 'lifting_target_visible', 'camera_param', 'factor'
}

instance_mapping_table = dict(
lifting_target='lifting_target',
lifting_target_visible='lifting_target_visible',
)
label_mapping_table = dict(
trajectory_weights='trajectory_weights',
lifting_target_label='lifting_target_label',
lifting_target_weight='lifting_target_weight')

def __init__(self,
num_keypoints: int,
root_index: int = 0,
Expand Down
9 changes: 9 additions & 0 deletions mmpose/codecs/video_pose_lifting.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ class VideoPoseLifting(BaseKeypointCodec):
'lifting_target', 'lifting_target_visible', 'camera_param'
}

instance_mapping_table = dict(
lifting_target='lifting_target',
lifting_target_visible='lifting_target_visible',
)
label_mapping_table = dict(
trajectory_weights='trajectory_weights',
lifting_target_label='lifting_target_label',
lifting_target_weight='lifting_target_weight')

def __init__(self,
num_keypoints: int,
zero_center: bool = True,
Expand Down
30 changes: 25 additions & 5 deletions mmpose/datasets/transforms/common_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,18 +974,38 @@ def transform(self, results: Dict) -> Optional[dict]:
keypoints_visible=keypoints_visible,
**auxiliary_encode_kwargs)

if self.encoder.field_mapping_table:
encoded[
'field_mapping_table'] = self.encoder.field_mapping_table
if self.encoder.instance_mapping_table:
encoded['instance_mapping_table'] = \
self.encoder.instance_mapping_table
if self.encoder.label_mapping_table:
encoded[
'label_mapping_table'] = self.encoder.label_mapping_table

else:
encoded_list = []
for _encoder in self.encoder:
auxiliary_encode_kwargs = {
key: results[key]
for key in _encoder.auxiliary_encode_keys
}
encoded_list.append(
_encoder.encode(
keypoints=keypoints,
keypoints_visible=keypoints_visible,
**auxiliary_encode_kwargs))
encoded = _encoder.encode(
keypoints=keypoints,
keypoints_visible=keypoints_visible,
**auxiliary_encode_kwargs)

if _encoder.field_mapping_table:
encoded['field_mapping_table'] = \
self.encoder.field_mapping_table
if _encoder.instance_mapping_table:
encoded['instance_mapping_table'] = \
self.encoder.instance_mapping_table
if _encoder.label_mapping_table:
encoded['label_mapping_table'] = \
self.encoder.label_mapping_table
encoded_list.append(encoded)

if self.multilevel:
# For multilevel encoding, the encoded items from each encoder
Expand Down
76 changes: 29 additions & 47 deletions mmpose/datasets/transforms/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,44 +104,30 @@ class PackPoseInputs(BaseTransform):

# items in `instance_mapping_table` will be directly packed into
# PoseDataSample.gt_instances without converting to Tensor
instance_mapping_table = {
'bbox': 'bboxes',
'head_size': 'head_size',
'bbox_center': 'bbox_centers',
'bbox_scale': 'bbox_scales',
'bbox_score': 'bbox_scores',
'keypoints': 'keypoints',
'keypoints_visible': 'keypoints_visible',
'lifting_target': 'lifting_target',
'lifting_target_visible': 'lifting_target_visible',
}

# items in `label_mapping_table` will be packed into
# PoseDataSample.gt_instance_labels and converted to Tensor. These items
# will be used for computing losses
label_mapping_table = {
'keypoint_labels': 'keypoint_labels',
'lifting_target_label': 'lifting_target_label',
'lifting_target_weight': 'lifting_target_weight',
'trajectory_weights': 'trajectory_weights',
'keypoint_x_labels': 'keypoint_x_labels',
'keypoint_y_labels': 'keypoint_y_labels',
'keypoint_weights': 'keypoint_weights',
'instance_coords': 'instance_coords',
'keypoints_visible_weights': 'keypoints_visible_weights'
}
instance_mapping_table = dict(
bbox='bboxes',
bbox_score='bbox_scores',
keypoints='keypoints',
keypoints_visible='keypoints_visible')

# items in `field_mapping_table` will be packed into
# PoseDataSample.gt_fields and converted to Tensor. These items will be
# used for computing losses
field_mapping_table = {
'heatmaps': 'heatmaps',
'instance_heatmaps': 'instance_heatmaps',
'heatmap_mask': 'heatmap_mask',
'heatmap_weights': 'heatmap_weights',
'displacements': 'displacements',
'displacement_weights': 'displacement_weights',
}
field_mapping_table = dict(
heatmaps='heatmaps',
instance_heatmaps='instance_heatmaps',
heatmap_mask='heatmap_mask',
heatmap_weights='heatmap_weights',
displacements='displacements',
displacement_weights='displacement_weights')

# items in `label_mapping_table` will be packed into
# PoseDataSample.gt_instance_labels and converted to Tensor. These items
# will be used for computing losses
label_mapping_table = dict(
keypoint_labels='keypoint_labels',
keypoint_weights='keypoint_weights',
keypoints_visible_weights='keypoints_visible_weights')

def __init__(self,
meta_keys=('id', 'img_id', 'img_path', 'category_id',
Expand Down Expand Up @@ -182,12 +168,10 @@ def transform(self, results: dict) -> dict:

# pack instance data
gt_instances = InstanceData()
for key, packed_key in self.instance_mapping_table.items():
_instance_mapping_table = results.get('instance_mapping_table',
self.instance_mapping_table)
for key, packed_key in _instance_mapping_table.items():
if key in results:
if 'lifting_target' in results and key in {
'keypoints', 'keypoints_visible'
}:
continue
gt_instances.set_field(results[key], packed_key)

# pack `transformed_keypoints` for visualizing data transform
Expand All @@ -200,14 +184,10 @@ def transform(self, results: dict) -> dict:

# pack instance labels
gt_instance_labels = InstanceData()
for key, packed_key in self.label_mapping_table.items():
_label_mapping_table = results.get('label_mapping_table',
self.label_mapping_table)
for key, packed_key in _label_mapping_table.items():
if key in results:
# For pose-lifting, store only target-related fields
if 'lifting_target' in results and packed_key in {
'keypoint_labels', 'keypoint_weights',
'keypoints_visible'
}:
continue
if isinstance(results[key], list):
# A list of labels is usually generated by combined
# multiple encoders (See ``GenerateTarget`` in
Expand All @@ -222,7 +202,9 @@ def transform(self, results: dict) -> dict:

# pack fields
gt_fields = None
for key, packed_key in self.field_mapping_table.items():
_field_mapping_table = results.get('field_mapping_table',
self.field_mapping_table)
for key, packed_key in _field_mapping_table.items():
if key in results:
if isinstance(results[key], list):
if gt_fields is None:
Expand Down
6 changes: 3 additions & 3 deletions mmpose/datasets/transforms/pose3d_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def transform(self, results: Dict) -> dict:
lifting_target, lifting_target_visible, flip_indices,
**self.target_flip_cfg)

results[keypoints_key] = keypoints
results[keypoints_visible_key] = keypoints_visible
results[target_key] = lifting_target
results['keypoints_key'] = keypoints
results['keypoints_visible_key'] = keypoints_visible
results['target_key'] = lifting_target
results['lifting_target_visible'] = lifting_target_visible

# flip horizontal distortion coefficients
Expand Down