Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into vs/upd_mapi
Browse files Browse the repository at this point in the history
  • Loading branch information
sovrasov committed Aug 12, 2024
2 parents 653acaa + d66eaf7 commit 10a557b
Show file tree
Hide file tree
Showing 51 changed files with 824 additions and 728 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ All notable changes to this project will be documented in this file.
(<https://github.com/openvinotoolkit/training_extensions/pull/3759>)
- Enable to use polygon and bitmap mask as prompt inputs for zero-shot learning
(<https://github.com/openvinotoolkit/training_extensions/pull/3769>)
- Refactoring `ConvModule` by removing `conv_cfg` and `act_cfg`
(<https://github.com/openvinotoolkit/training_extensions/pull/3783>, <https://github.com/openvinotoolkit/training_extensions/pull/3809>)

### Bug fixes

Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This is the source code for the OTX documentation. It is built using sphinx-desi
To install the dependencies, run the following command:

```bash
otx install --option docs
pip install otx[docs]
```

## Build
Expand Down
47 changes: 24 additions & 23 deletions src/otx/algo/action_classification/backbones/x3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from __future__ import annotations

import math
from typing import Callable

import torch.utils.checkpoint as cp
from torch import Tensor, nn
from torch.nn.modules.batchnorm import _BatchNorm

from otx.algo.modules.activation import Swish, build_activation_layer
from otx.algo.modules.activation import Swish
from otx.algo.modules.conv_module import Conv3dModule
from otx.algo.utils.mmengine_utils import load_checkpoint
from otx.algo.utils.weight_init import constant_init, kaiming_init
Expand Down Expand Up @@ -73,8 +74,8 @@ class BlockX3D(nn.Module):
before and after the 3x3x3 conv. Default: True.
norm_cfg (dict): Config for norm layers. required keys are ``type``,
Default: ``dict(type='BN3d')``.
act_cfg (dict): Config dict for activation layer.
Default: ``dict(type='ReLU')``.
activation_callable (Callable[..., nn.Module] | None): Activation layer module.
Defaults to `nn.ReLU`.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
"""
Expand All @@ -89,7 +90,7 @@ def __init__(
se_ratio: float | None = None,
use_swish: bool = True,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
activation_callable: Callable[..., nn.Module] | None = nn.ReLU,
with_cp: bool = False,
):
super().__init__()
Expand All @@ -102,8 +103,7 @@ def __init__(
self.se_ratio = se_ratio
self.use_swish = use_swish
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.act_cfg_swish = Swish()
self.activation_callable = activation_callable
self.with_cp = with_cp

self.conv1 = Conv3dModule(
Expand All @@ -114,7 +114,7 @@ def __init__(
padding=0,
bias=False,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
activation_callable=self.activation_callable,
)
# Here we use the channel-wise conv
self.conv2 = Conv3dModule(
Expand All @@ -126,7 +126,7 @@ def __init__(
groups=planes,
bias=False,
norm_cfg=self.norm_cfg,
act_cfg=None,
activation_callable=None,
)

self.swish = Swish()
Expand All @@ -139,13 +139,13 @@ def __init__(
padding=0,
bias=False,
norm_cfg=self.norm_cfg,
act_cfg=None,
activation_callable=None,
)

if self.se_ratio is not None:
self.se_module = SEModule(planes, self.se_ratio)

self.relu = build_activation_layer(self.act_cfg) if self.act_cfg else build_activation_layer({})
self.relu = self.activation_callable() if self.activation_callable else nn.ReLU(inplace=True)

def forward(self, x: Tensor) -> Tensor:
"""Defines the computation performed at every call."""
Expand Down Expand Up @@ -198,8 +198,8 @@ class X3DBackbone(nn.Module):
norm_cfg (dict): Config for norm layers. required keys are ``type`` and
``requires_grad``.
Default: ``dict(type='BN3d', requires_grad=True)``.
act_cfg (dict): Config dict for activation layer.
Default: ``dict(type='ReLU', inplace=True)``.
activation_callable (Callable[..., nn.Module] | None): Activation layer module.
Defaults to `nn.ReLU`.
norm_eval (bool): Whether to set BN layers to eval mode, namely, freeze
running stats (mean and var). Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
Expand All @@ -224,7 +224,7 @@ def __init__(
se_ratio: float = 1 / 16,
use_swish: bool = True,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
activation_callable: Callable[..., nn.Module] | None = nn.ReLU,
norm_eval: bool = False,
with_cp: bool = False,
zero_init_residual: bool = True,
Expand Down Expand Up @@ -267,7 +267,7 @@ def __init__(
self.use_swish = use_swish

self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.activation_callable = activation_callable
self.norm_eval = norm_eval
self.with_cp = with_cp
self.zero_init_residual = zero_init_residual
Expand All @@ -294,7 +294,7 @@ def __init__(
se_ratio=self.se_ratio,
use_swish=self.use_swish,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
activation_callable=self.activation_callable,
with_cp=with_cp,
**kwargs,
)
Expand All @@ -312,7 +312,7 @@ def __init__(
padding=0,
bias=False,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
activation_callable=self.activation_callable,
)
self.feat_dim = int(self.feat_dim * self.gamma_b)

Expand Down Expand Up @@ -350,7 +350,7 @@ def make_res_layer(
se_ratio: float | None = None,
use_swish: bool = True,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
activation_callable: Callable[..., nn.Module] | None = nn.ReLU,
with_cp: bool = False,
**kwargs,
) -> nn.Module:
Expand All @@ -376,7 +376,8 @@ def make_res_layer(
use_swish (bool): Whether to use swish as the activation function
before and after the 3x3x3 conv. Default: True.
norm_cfg (dict | None): Config for norm layers. Default: None.
act_cfg (dict | None): Config for activate layers. Default: None.
activation_callable (Callable[..., nn.Module] | None): Activation layer module.
Defaults to `nn.ReLU`.
with_cp (bool | None): Use checkpoint or not. Using checkpoint
will save some memory while slowing down the training speed.
Default: False.
Expand All @@ -394,7 +395,7 @@ def make_res_layer(
padding=0,
bias=False,
norm_cfg=norm_cfg,
act_cfg=None,
activation_callable=None,
)

use_se = [False] * blocks
Expand All @@ -416,7 +417,7 @@ def make_res_layer(
se_ratio=se_ratio if use_se[0] else None,
use_swish=use_swish,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
activation_callable=activation_callable,
with_cp=with_cp,
**kwargs,
),
Expand All @@ -432,7 +433,7 @@ def make_res_layer(
se_ratio=se_ratio if use_se[i] else None,
use_swish=use_swish,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
activation_callable=activation_callable,
with_cp=with_cp,
**kwargs,
),
Expand All @@ -450,7 +451,7 @@ def _make_stem_layer(self) -> None:
padding=(0, 1, 1),
bias=False,
norm_cfg=None,
act_cfg=None,
activation_callable=None,
)
self.conv1_t = Conv3dModule(
self.base_channels,
Expand All @@ -461,7 +462,7 @@ def _make_stem_layer(self) -> None:
groups=self.base_channels,
bias=False,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
activation_callable=self.activation_callable,
)

def _freeze_stages(self) -> None:
Expand Down
3 changes: 2 additions & 1 deletion src/otx/algo/action_classification/x3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from __future__ import annotations

from functools import partial
from typing import TYPE_CHECKING

from torch import nn
Expand Down Expand Up @@ -65,7 +66,7 @@ def _build_model(self, num_classes: int) -> nn.Module:
gamma_d=2.2,
gamma_w=1,
norm_cfg={"type": "BN3d", "requires_grad": True},
act_cfg={"type": "ReLU", "inplace": True},
activation_callable=partial(nn.ReLU, inplace=True),
),
cls_head=X3DHead(
num_classes=num_classes,
Expand Down
Loading

0 comments on commit 10a557b

Please sign in to comment.