diff --git a/configs/dgcnn/_base_/default_runtime.py b/configs/dgcnn/_base_/default_runtime.py new file mode 100644 index 0000000000..c52273b582 --- /dev/null +++ b/configs/dgcnn/_base_/default_runtime.py @@ -0,0 +1,35 @@ +from mmengine.hooks.checkpoint_hook import CheckpointHook +from mmengine.hooks.iter_timer_hook import IterTimerHook +from mmengine.hooks.logger_hook import LoggerHook +from mmengine.hooks.param_scheduler_hook import ParamSchedulerHook +from mmengine.hooks.runtime_info_hook import RuntimeInfoHook +from mmengine.hooks.sampler_seed_hook import DistSamplerSeedHook +from mmengine.hooks.sync_buffer_hook import SyncBuffersHook +from mmengine.runner.log_processor import LogProcessor + +from mmaction.visualization.action_visualizer import ActionVisualizer +from mmaction.visualization.video_backend import LocalVisBackend + +# hooks +default_hooks = dict( + runtime_info=dict(type=RuntimeInfoHook), + timer=dict(type=IterTimerHook), + logger=dict(type=LoggerHook, interval=20, ignore_last=False), + param_scheduler=dict(type=ParamSchedulerHook), + checkpoint=dict(type=CheckpointHook, interval=1, save_best='auto'), + sampler_seed=dict(type=DistSamplerSeedHook), + sync_buffers=dict(type=SyncBuffersHook)) + +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl')) + +log_processor = dict(type=LogProcessor, window_size=20, by_epoch=True) + +vis_backends = [dict(type=LocalVisBackend)] +visualizer = dict(type=ActionVisualizer, vis_backends=vis_backends) + +log_level = 'INFO' +load_from = None +resume = False diff --git a/configs/dgcnn/_base_/models/slowfast_r50.py b/configs/dgcnn/_base_/models/slowfast_r50.py new file mode 100644 index 0000000000..9cba8d5223 --- /dev/null +++ b/configs/dgcnn/_base_/models/slowfast_r50.py @@ -0,0 +1,48 @@ +from mmaction.models.backbones.resnet3d_slowfast import ResNet3dSlowFast +from mmaction.models.data_preprocessors.data_preprocessor import \ + ActionDataPreprocessor +from mmaction.models.heads.slowfast_head import SlowFastHead +from mmaction.models.recognizers.recognizer3d import Recognizer3D + +# model settings +model = dict( + type=Recognizer3D, + backbone=dict( + type=ResNet3dSlowFast, + pretrained=None, + resample_rate=8, # tau + speed_ratio=8, # alpha + channel_ratio=8, # beta_inv + slow_pathway=dict( + type='resnet3d', + depth=50, + pretrained=None, + lateral=True, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1), + norm_eval=False), + fast_pathway=dict( + type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1, + norm_eval=False)), + cls_head=dict( + type=SlowFastHead, + in_channels=2304, # 2048+256 + num_classes=400, + spatial_type='avg', + dropout_ratio=0.5, + average_clips='prob'), + data_preprocessor=dict( + type=ActionDataPreprocessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + format_shape='NCTHW')) diff --git a/configs/dgcnn/recognition/slowfast/slowfast_r101_8xb8_8x8x1_256e_kinetics400_rgb.py b/configs/dgcnn/recognition/slowfast/slowfast_r101_8xb8_8x8x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..40068b7818 --- /dev/null +++ b/configs/dgcnn/recognition/slowfast/slowfast_r101_8xb8_8x8x1_256e_kinetics400_rgb.py @@ -0,0 +1,7 @@ +from mmengine.config import read_base + +with read_base(): + from .slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb import * + +model = dict( + backbone=dict(slow_pathway=dict(depth=101), fast_pathway=dict(depth=101))) diff --git a/configs/dgcnn/recognition/slowfast/slowfast_r101_r50_32xb8_4x16x1_256e_kinetics400_rgb.py b/configs/dgcnn/recognition/slowfast/slowfast_r101_r50_32xb8_4x16x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..8018c79fe1 --- /dev/null +++ b/configs/dgcnn/recognition/slowfast/slowfast_r101_r50_32xb8_4x16x1_256e_kinetics400_rgb.py @@ -0,0 +1,8 @@ +from mmengine.config import read_base + +with read_base(): + from .slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb import * + +model = dict(backbone=dict(slow_pathway=dict(depth=101))) + +optim_wrapper = dict(optimizer=dict(lr=0.1 * 4)) diff --git a/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8-8x8x1_steplr_256e_kinetics400_rgb.py b/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8-8x8x1_steplr_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..997964bc66 --- /dev/null +++ b/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8-8x8x1_steplr_256e_kinetics400_rgb.py @@ -0,0 +1,23 @@ +from mmengine.config import read_base + +with read_base(): + from .slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb import * + +model = dict(backbone=dict(slow_pathway=dict(lateral_norm=True))) + +param_scheduler = [ + dict( + type='LinearLR', + start_factor=0.1, + by_epoch=True, + begin=0, + end=34, + convert_to_iter_based=True), + dict( + type='MultiStepLR', + begin=0, + end=256, + by_epoch=True, + milestones=[94, 154, 196], + gamma=0.1) +] diff --git a/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb.py b/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..156eb5768c --- /dev/null +++ b/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb.py @@ -0,0 +1,133 @@ +from mmengine.config import read_base +from mmengine.dataset import DefaultSampler +from mmengine.optim import CosineAnnealingLR, LinearLR +from mmengine.runner import EpochBasedTrainLoop, TestLoop, ValLoop +from torch.optim import SGD + +from mmaction.datasets.transforms.formatting import (FormatShape, + PackActionInputs) +from mmaction.datasets.transforms.loading import (DecordDecode, DecordInit, + SampleFrames) +from mmaction.datasets.transforms.processing import (CenterCrop, Flip, + RandomResizedCrop, Resize, + ThreeCrop) +from mmaction.datasets.video_dataset import VideoDataset +from mmaction.evaluation.metrics.acc_metric import AccMetric + +with read_base(): + from ..._base_.default_runtime import * + from ..._base_.models.slowfast_r50 import * + +dataset_type = VideoDataset +data_root = 'data/kinetics400/videos_train' +data_root_val = 'data/kinetics400/videos_val' +ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' +ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt' + +file_client_args = dict(io_backend='disk') +train_pipeline = [ + dict(type=DecordInit, **file_client_args), + dict(type=SampleFrames, clip_len=32, frame_interval=2, num_clips=1), + dict(type=DecordDecode), + dict(type=Resize, scale=(-1, 256)), + dict(type=RandomResizedCrop), + dict(type=Resize, scale=(224, 224), keep_ratio=False), + dict(type=Flip, flip_ratio=0.5), + dict(type=FormatShape, input_format='NCTHW'), + dict(type=PackActionInputs) +] +val_pipeline = [ + dict(type=DecordInit, **file_client_args), + dict( + type=SampleFrames, + clip_len=32, + frame_interval=2, + num_clips=1, + test_mode=True), + dict(type=DecordDecode), + dict(type=Resize, scale=(-1, 256)), + dict(type=CenterCrop, crop_size=224), + dict(type=FormatShape, input_format='NCTHW'), + dict(type=PackActionInputs) +] +test_pipeline = [ + dict(type=DecordInit, **file_client_args), + dict( + type=SampleFrames, + clip_len=32, + frame_interval=2, + num_clips=10, + test_mode=True), + dict(type=DecordDecode), + dict(type=Resize, scale=(-1, 256)), + dict(type=ThreeCrop, crop_size=256), + dict(type=FormatShape, input_format='NCTHW'), + dict(type=PackActionInputs) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=True), + dataset=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=dict(video=data_root), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=False), + dataset=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=dict(video=data_root_val), + pipeline=val_pipeline, + test_mode=True)) +test_dataloader = dict( + batch_size=1, + num_workers=8, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=False), + dataset=dict( + type=dataset_type, + ann_file=ann_file_test, + data_prefix=dict(video=data_root_val), + pipeline=test_pipeline, + test_mode=True)) + +val_evaluator = dict(type=AccMetric) +test_evaluator = val_evaluator + +train_cfg = dict( + type=EpochBasedTrainLoop, max_epochs=256, val_begin=1, val_interval=5) +val_cfg = dict(type=ValLoop) +test_cfg = dict(type=TestLoop) + +optim_wrapper = dict( + optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=1e-4), + clip_grad=dict(max_norm=40, norm_type=2)) + +param_scheduler = [ + dict( + type=LinearLR, + start_factor=0.1, + by_epoch=True, + begin=0, + end=34, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + T_max=256, + eta_min=0, + by_epoch=True, + begin=0, + end=256) +] + +default_hooks.update( + dict( + checkpoint=dict(interval=4, max_keep_ckpts=3), + logger=dict(interval=100))) diff --git a/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb.py b/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb.py new file mode 100644 index 0000000000..8135074d07 --- /dev/null +++ b/configs/dgcnn/recognition/slowfast/slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb.py @@ -0,0 +1,11 @@ +from mmengine.config import read_base + +with read_base(): + from .slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb import * + +model = dict( + backbone=dict( + resample_rate=4, # tau + speed_ratio=4, # alpha + channel_ratio=8, # beta_inv + slow_pathway=dict(fusion_kernel=7)))