Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MMSIG] Add new configuration files for SlowFast #2753

Open
wants to merge 1 commit into
base: dev-1.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions configs/dgcnn/_base_/default_runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.hooks.iter_timer_hook import IterTimerHook
from mmengine.hooks.logger_hook import LoggerHook
from mmengine.hooks.param_scheduler_hook import ParamSchedulerHook
from mmengine.hooks.runtime_info_hook import RuntimeInfoHook
from mmengine.hooks.sampler_seed_hook import DistSamplerSeedHook
from mmengine.hooks.sync_buffer_hook import SyncBuffersHook
from mmengine.runner.log_processor import LogProcessor

from mmaction.visualization.action_visualizer import ActionVisualizer
from mmaction.visualization.video_backend import LocalVisBackend

# hooks
default_hooks = dict(
runtime_info=dict(type=RuntimeInfoHook),
timer=dict(type=IterTimerHook),
logger=dict(type=LoggerHook, interval=20, ignore_last=False),
param_scheduler=dict(type=ParamSchedulerHook),
checkpoint=dict(type=CheckpointHook, interval=1, save_best='auto'),
sampler_seed=dict(type=DistSamplerSeedHook),
sync_buffers=dict(type=SyncBuffersHook))

env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'))

log_processor = dict(type=LogProcessor, window_size=20, by_epoch=True)

vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(type=ActionVisualizer, vis_backends=vis_backends)

log_level = 'INFO'
load_from = None
resume = False
48 changes: 48 additions & 0 deletions configs/dgcnn/_base_/models/slowfast_r50.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from mmaction.models.backbones.resnet3d_slowfast import ResNet3dSlowFast
from mmaction.models.data_preprocessors.data_preprocessor import \
ActionDataPreprocessor
from mmaction.models.heads.slowfast_head import SlowFastHead
from mmaction.models.recognizers.recognizer3d import Recognizer3D

# model settings
model = dict(
type=Recognizer3D,
backbone=dict(
type=ResNet3dSlowFast,
pretrained=None,
resample_rate=8, # tau
speed_ratio=8, # alpha
channel_ratio=8, # beta_inv
slow_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=True,
conv1_kernel=(1, 7, 7),
dilations=(1, 1, 1, 1),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
fast_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=False,
base_channels=8,
conv1_kernel=(5, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
norm_eval=False)),
cls_head=dict(
type=SlowFastHead,
in_channels=2304, # 2048+256
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5,
average_clips='prob'),
data_preprocessor=dict(
type=ActionDataPreprocessor,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
format_shape='NCTHW'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from mmengine.config import read_base

with read_base():
from .slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb import *

model = dict(
backbone=dict(slow_pathway=dict(depth=101), fast_pathway=dict(depth=101)))
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from mmengine.config import read_base

with read_base():
from .slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb import *

model = dict(backbone=dict(slow_pathway=dict(depth=101)))

optim_wrapper = dict(optimizer=dict(lr=0.1 * 4))
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from mmengine.config import read_base

with read_base():
from .slowfast_r50_8xb8_8x8x1_256e_kinetics400_rgb import *

model = dict(backbone=dict(slow_pathway=dict(lateral_norm=True)))

param_scheduler = [
dict(
type='LinearLR',
start_factor=0.1,
by_epoch=True,
begin=0,
end=34,
convert_to_iter_based=True),
dict(
type='MultiStepLR',
begin=0,
end=256,
by_epoch=True,
milestones=[94, 154, 196],
gamma=0.1)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from mmengine.config import read_base
from mmengine.dataset import DefaultSampler
from mmengine.optim import CosineAnnealingLR, LinearLR
from mmengine.runner import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.optim import SGD

from mmaction.datasets.transforms.formatting import (FormatShape,
PackActionInputs)
from mmaction.datasets.transforms.loading import (DecordDecode, DecordInit,
SampleFrames)
from mmaction.datasets.transforms.processing import (CenterCrop, Flip,
RandomResizedCrop, Resize,
ThreeCrop)
from mmaction.datasets.video_dataset import VideoDataset
from mmaction.evaluation.metrics.acc_metric import AccMetric

with read_base():
from ..._base_.default_runtime import *
from ..._base_.models.slowfast_r50 import *

dataset_type = VideoDataset
data_root = 'data/kinetics400/videos_train'
data_root_val = 'data/kinetics400/videos_val'
ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt'

file_client_args = dict(io_backend='disk')
train_pipeline = [
dict(type=DecordInit, **file_client_args),
dict(type=SampleFrames, clip_len=32, frame_interval=2, num_clips=1),
dict(type=DecordDecode),
dict(type=Resize, scale=(-1, 256)),
dict(type=RandomResizedCrop),
dict(type=Resize, scale=(224, 224), keep_ratio=False),
dict(type=Flip, flip_ratio=0.5),
dict(type=FormatShape, input_format='NCTHW'),
dict(type=PackActionInputs)
]
val_pipeline = [
dict(type=DecordInit, **file_client_args),
dict(
type=SampleFrames,
clip_len=32,
frame_interval=2,
num_clips=1,
test_mode=True),
dict(type=DecordDecode),
dict(type=Resize, scale=(-1, 256)),
dict(type=CenterCrop, crop_size=224),
dict(type=FormatShape, input_format='NCTHW'),
dict(type=PackActionInputs)
]
test_pipeline = [
dict(type=DecordInit, **file_client_args),
dict(
type=SampleFrames,
clip_len=32,
frame_interval=2,
num_clips=10,
test_mode=True),
dict(type=DecordDecode),
dict(type=Resize, scale=(-1, 256)),
dict(type=ThreeCrop, crop_size=256),
dict(type=FormatShape, input_format='NCTHW'),
dict(type=PackActionInputs)
]
train_dataloader = dict(
batch_size=8,
num_workers=8,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=dataset_type,
ann_file=ann_file_train,
data_prefix=dict(video=data_root),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=8,
num_workers=8,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=dict(video=data_root_val),
pipeline=val_pipeline,
test_mode=True))
test_dataloader = dict(
batch_size=1,
num_workers=8,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=dataset_type,
ann_file=ann_file_test,
data_prefix=dict(video=data_root_val),
pipeline=test_pipeline,
test_mode=True))

val_evaluator = dict(type=AccMetric)
test_evaluator = val_evaluator

train_cfg = dict(
type=EpochBasedTrainLoop, max_epochs=256, val_begin=1, val_interval=5)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)

optim_wrapper = dict(
optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=1e-4),
clip_grad=dict(max_norm=40, norm_type=2))

param_scheduler = [
dict(
type=LinearLR,
start_factor=0.1,
by_epoch=True,
begin=0,
end=34,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=256,
eta_min=0,
by_epoch=True,
begin=0,
end=256)
]

default_hooks.update(
dict(
checkpoint=dict(interval=4, max_keep_ckpts=3),
logger=dict(interval=100)))
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from mmengine.config import read_base

with read_base():
from .slowfast_r50_8xb8_4x16x1_256e_kinetics400_rgb import *

model = dict(
backbone=dict(
resample_rate=4, # tau
speed_ratio=4, # alpha
channel_ratio=8, # beta_inv
slow_pathway=dict(fusion_kernel=7)))