diff --git a/src/asm/x86/cdef.rs b/src/asm/x86/cdef.rs index e9011a12c6..5a741dce95 100644 --- a/src/asm/x86/cdef.rs +++ b/src/asm/x86/cdef.rs @@ -9,8 +9,7 @@ use crate::cdef::*; use crate::cpu_features::CpuFeatureLevel; -use crate::frame::*; -use crate::tiling::PlaneRegionMut; +use crate::tiling::{PlaneRegion, PlaneRegionMut}; use crate::util::*; type CdefFilterFn = unsafe extern fn( @@ -41,8 +40,8 @@ const fn decimate_index(xdec: usize, ydec: usize) -> usize { ((ydec << 1) | xdec) & 3 } -pub(crate) unsafe fn cdef_filter_block( - dst: &mut PlaneRegionMut<'_, T>, src: *const u16, src_stride: isize, +pub(crate) fn cdef_filter_block( + dst: &mut PlaneRegionMut<'_, T>, src: &PlaneRegion<'_, u16>, pri_strength: i32, sec_strength: i32, dir: usize, damping: i32, bit_depth: usize, xdec: usize, ydec: usize, cpu: CpuFeatureLevel, ) { @@ -50,7 +49,6 @@ pub(crate) unsafe fn cdef_filter_block( rust::cdef_filter_block( dst, src, - src_stride, pri_strength, sec_strength, dir, @@ -67,40 +65,42 @@ pub(crate) unsafe fn cdef_filter_block( call_rust(&mut copy.as_region_mut()); copy }; - match T::type_enum() { - PixelType::U8 => { - match CDEF_FILTER_FNS[cpu.as_index()][decimate_index(xdec, ydec)] { - Some(func) => { - (func)( - dst.data_ptr_mut() as *mut _, - T::to_asm_stride(dst.plane_cfg.stride), - src, - src_stride, - pri_strength, - sec_strength, - dir as i32, - damping, - ); + unsafe { + match T::type_enum() { + PixelType::U8 => { + match CDEF_FILTER_FNS[cpu.as_index()][decimate_index(xdec, ydec)] { + Some(func) => { + (func)( + dst.data_ptr_mut() as *mut _, + T::to_asm_stride(dst.plane_cfg.stride), + src.data_ptr() as *const _, + T::to_asm_stride(src.plane_cfg.stride), + pri_strength, + sec_strength, + dir as i32, + damping, + ); + } + None => call_rust(dst), } - None => call_rust(dst), } - } - PixelType::U16 => { - match CDEF_FILTER_HBD_FNS[cpu.as_index()][decimate_index(xdec, ydec)] { - Some(func) => { - (func)( - dst.data_ptr_mut() as *mut _, - T::to_asm_stride(dst.plane_cfg.stride), - src, - src_stride, - pri_strength, - sec_strength, - dir as i32, - damping, - (1 << bit_depth) - 1, + PixelType::U16 => { + match CDEF_FILTER_HBD_FNS[cpu.as_index()][decimate_index(xdec, ydec)] { + Some(func) => { + (func)( + dst.data_ptr_mut() as *mut _, + T::to_asm_stride(dst.plane_cfg.stride), + src.data_ptr() as *const _, + T::to_asm_stride(src.plane_cfg.stride), + pri_strength, + sec_strength, + dir as i32, + damping, + (1 << bit_depth) - 1, ); + } + None => call_rust(dst), } - None => call_rust(dst), } } } @@ -159,7 +159,7 @@ type CdefDirFn = #[inline(always)] #[allow(clippy::let_and_return)] pub(crate) fn cdef_find_dir( - img: &PlaneSlice<'_, u16>, var: &mut u32, coeff_shift: usize, + img: &PlaneRegion<'_, u16>, var: &mut u32, coeff_shift: usize, cpu: CpuFeatureLevel, ) -> i32 { let call_rust = @@ -180,8 +180,8 @@ pub(crate) fn cdef_find_dir( // input, even when working with 8 bit input. Mostly done to limit // the amount of code being impacted. (func)( - img.as_ptr() as *const u16, - u16::to_asm_stride(img.plane.cfg.stride), + img.data_ptr() as *const u16, + u16::to_asm_stride(img.plane_cfg.stride), var as *mut u32, ) } diff --git a/src/asm/x86/lrf.rs b/src/asm/x86/lrf.rs index 7d705b2264..4b5d63d7c2 100644 --- a/src/asm/x86/lrf.rs +++ b/src/asm/x86/lrf.rs @@ -8,7 +8,7 @@ // PATENTS file, you can obtain it at www.aomedia.org/license/patent. use crate::cpu_features::CpuFeatureLevel; -use crate::frame::PlaneSlice; +use crate::tiling::PlaneRegion; use crate::lrf::*; use crate::util::Pixel; #[cfg(target_arch = "x86")] @@ -93,7 +93,7 @@ pub fn sgrproj_box_ab_r2( #[inline] pub fn sgrproj_box_f_r0( - f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneSlice, + f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, cpu: CpuFeatureLevel, ) { if cpu >= CpuFeatureLevel::AVX2 { @@ -108,7 +108,7 @@ pub fn sgrproj_box_f_r0( #[inline] pub fn sgrproj_box_f_r1( af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], y: usize, w: usize, - cdeffed: &PlaneSlice, cpu: CpuFeatureLevel, + cdeffed: &PlaneRegion<'_, T>, cpu: CpuFeatureLevel, ) { if cpu >= CpuFeatureLevel::AVX2 { return unsafe { @@ -122,7 +122,7 @@ pub fn sgrproj_box_f_r1( #[inline] pub fn sgrproj_box_f_r2( af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32], - y: usize, w: usize, cdeffed: &PlaneSlice, cpu: CpuFeatureLevel, + y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, cpu: CpuFeatureLevel, ) { if cpu >= CpuFeatureLevel::AVX2 { return unsafe { @@ -353,18 +353,18 @@ pub(crate) unsafe fn sgrproj_box_ab_r2_avx2( #[inline] #[target_feature(enable = "avx2")] unsafe fn sgrproj_box_f_r0_8_avx2( - f: &mut [u32], x: usize, y: usize, cdeffed: &PlaneSlice, + f: &mut [u32], x: usize, y: usize, cdeffed: &PlaneRegion<'_, T>, ) { _mm256_storeu_si256( f.as_mut_ptr().add(x) as *mut _, _mm256_slli_epi32( if mem::size_of::() == 1 { _mm256_cvtepu8_epi32(_mm_loadl_epi64( - cdeffed.subslice(x, y).as_ptr() as *const _ + cdeffed[y][x..].as_ptr() as *const _ )) } else { _mm256_cvtepu16_epi32(_mm_loadu_si128( - cdeffed.subslice(x, y).as_ptr() as *const _ + cdeffed[y][x..].as_ptr() as *const _ )) }, SGRPROJ_RST_BITS as i32, @@ -374,7 +374,7 @@ unsafe fn sgrproj_box_f_r0_8_avx2( #[target_feature(enable = "avx2")] pub(crate) unsafe fn sgrproj_box_f_r0_avx2( - f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneSlice, + f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, ) { for x in (0..w).step_by(8) { if x + 8 <= w { @@ -397,7 +397,7 @@ pub(crate) unsafe fn sgrproj_box_f_r0_avx2( #[target_feature(enable = "avx2")] unsafe fn sgrproj_box_f_r1_8_avx2( af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], x: usize, y: usize, - cdeffed: &PlaneSlice, + cdeffed: &PlaneRegion<'_, T>, ) { let three = _mm256_set1_epi32(3); let four = _mm256_set1_epi32(4); @@ -474,11 +474,11 @@ unsafe fn sgrproj_box_f_r1_8_avx2( a, if mem::size_of::() == 1 { _mm256_cvtepu8_epi32(_mm_loadl_epi64( - cdeffed.subslice(x, y).as_ptr() as *const _ + cdeffed[y][x..].as_ptr() as *const _ )) } else { _mm256_cvtepu16_epi32(_mm_loadu_si128( - cdeffed.subslice(x, y).as_ptr() as *const _ + cdeffed[y][x..].as_ptr() as *const _ )) }, ), @@ -497,7 +497,7 @@ unsafe fn sgrproj_box_f_r1_8_avx2( #[target_feature(enable = "avx2")] pub(crate) unsafe fn sgrproj_box_f_r1_avx2( af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], y: usize, w: usize, - cdeffed: &PlaneSlice, + cdeffed: &PlaneRegion<'_, T>, ) { for x in (0..w).step_by(8) { if x + 8 <= w { @@ -520,7 +520,7 @@ pub(crate) unsafe fn sgrproj_box_f_r1_avx2( #[target_feature(enable = "avx2")] unsafe fn sgrproj_box_f_r2_8_avx2( af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32], - x: usize, y: usize, cdeffed: &PlaneSlice, + x: usize, y: usize, cdeffed: &PlaneRegion<'_, T>, ) { let five = _mm256_set1_epi32(5); let six = _mm256_set1_epi32(6); @@ -573,11 +573,11 @@ unsafe fn sgrproj_box_f_r2_8_avx2( _mm256_add_epi32(a, ao), if mem::size_of::() == 1 { _mm256_cvtepu8_epi32(_mm_loadl_epi64( - cdeffed.subslice(x, y).as_ptr() as *const _ + cdeffed[y][x..].as_ptr() as *const _ )) } else { _mm256_cvtepu16_epi32(_mm_loadu_si128( - cdeffed.subslice(x, y).as_ptr() as *const _ + cdeffed[y][x..].as_ptr() as *const _ )) }, ), @@ -588,11 +588,11 @@ unsafe fn sgrproj_box_f_r2_8_avx2( ao, if mem::size_of::() == 1 { _mm256_cvtepu8_epi32(_mm_loadl_epi64( - cdeffed.subslice(x, y + 1).as_ptr() as *const _, + cdeffed[y+1][x..].as_ptr() as *const _, )) } else { _mm256_cvtepu16_epi32(_mm_loadu_si128( - cdeffed.subslice(x, y + 1).as_ptr() as *const _, + cdeffed[y+1][x..].as_ptr() as *const _, )) }, ), @@ -619,7 +619,7 @@ unsafe fn sgrproj_box_f_r2_8_avx2( #[target_feature(enable = "avx2")] pub(crate) unsafe fn sgrproj_box_f_r2_avx2( af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32], - y: usize, w: usize, cdeffed: &PlaneSlice, + y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, ) { for x in (0..w).step_by(8) { if x + 8 <= w { diff --git a/src/cdef.rs b/src/cdef.rs index 4bf93a2ca7..4ffef66fa9 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -74,7 +74,7 @@ pub(crate) mod rust { // that term is never computed. See Section 2, step 2, of: // http://jmvalin.ca/notes/intra_paint.pdf pub fn cdef_find_dir( - img: &PlaneSlice<'_, u16>, var: &mut u32, coeff_shift: usize, + img: &PlaneRegion<'_, u16>, var: &mut u32, coeff_shift: usize, _cpu: CpuFeatureLevel, ) -> i32 { let mut cost: [i32; 8] = [0; 8]; @@ -153,8 +153,8 @@ pub(crate) mod rust { #[cold_for_target_arch("x86_64")] #[allow(clippy::erasing_op, clippy::identity_op, clippy::neg_multiply)] - pub(crate) unsafe fn cdef_filter_block( - dst: &mut PlaneRegionMut<'_, T>, input: *const u16, istride: isize, + pub(crate) fn cdef_filter_block( + dst: &mut PlaneRegionMut<'_, T>, src: &PlaneRegion<'_, u16>, pri_strength: i32, sec_strength: i32, dir: usize, damping: i32, bit_depth: usize, xdec: usize, ydec: usize, _cpu: CpuFeatureLevel, ) { @@ -165,6 +165,7 @@ pub(crate) mod rust { let cdef_sec_taps = [[2, 1], [2, 1]]; let pri_taps = cdef_pri_taps[((pri_strength >> coeff_shift) & 1) as usize]; let sec_taps = cdef_sec_taps[((pri_strength >> coeff_shift) & 1) as usize]; + let istride = src.plane_cfg.stride as isize; let cdef_directions = [ [-1 * istride + 1, -2 * istride + 2], [0 * istride + 1, -1 * istride + 2], @@ -175,58 +176,61 @@ pub(crate) mod rust { [1 * istride + 0, 2 * istride + 0], [1 * istride + 0, 2 * istride - 1], ]; - for i in 0..ysize { - for j in 0..xsize { - let ptr_in = input.offset(i * istride + j); - let x = *ptr_in; - let mut sum = 0 as i32; - let mut max = x; - let mut min = x; - for k in 0..2usize { - let cdef_dirs = [ - cdef_directions[dir][k], + unsafe { + for i in 0..ysize { + for j in 0..xsize { + // Next to go... + let ptr_in = src.data_ptr().offset(i * istride + j); + let x = *ptr_in; + let mut sum = 0 as i32; + let mut max = x; + let mut min = x; + for k in 0..2usize { + let cdef_dirs = [ + cdef_directions[dir][k], cdef_directions[(dir + 2) & 7][k], - cdef_directions[(dir + 6) & 7][k], - ]; - let pri_tap = pri_taps[k]; - let p = - [*ptr_in.offset(cdef_dirs[0]), *ptr_in.offset(-cdef_dirs[0])]; - for p_elem in p.iter() { - sum += pri_tap - * constrain( - i32::cast_from(*p_elem) - i32::cast_from(x), - pri_strength, - damping, - ); - if *p_elem != CDEF_VERY_LARGE { - max = cmp::max(*p_elem, max); + cdef_directions[(dir + 6) & 7][k], + ]; + let pri_tap = pri_taps[k]; + let p = + [*ptr_in.offset(cdef_dirs[0]), *ptr_in.offset(-cdef_dirs[0])]; + for p_elem in p.iter() { + sum += pri_tap + * constrain( + i32::cast_from(*p_elem) - i32::cast_from(x), + pri_strength, + damping, + ); + if *p_elem != CDEF_VERY_LARGE { + max = cmp::max(*p_elem, max); + } + min = cmp::min(*p_elem, min); } - min = cmp::min(*p_elem, min); - } - let s = [ - *ptr_in.offset(cdef_dirs[1]), - *ptr_in.offset(-cdef_dirs[1]), - *ptr_in.offset(cdef_dirs[2]), - *ptr_in.offset(-cdef_dirs[2]), - ]; - let sec_tap = sec_taps[k]; - for s_elem in s.iter() { - if *s_elem != CDEF_VERY_LARGE { - max = cmp::max(*s_elem, max); - } - min = cmp::min(*s_elem, min); - sum += sec_tap - * constrain( + let s = [ + *ptr_in.offset(cdef_dirs[1]), + *ptr_in.offset(-cdef_dirs[1]), + *ptr_in.offset(cdef_dirs[2]), + *ptr_in.offset(-cdef_dirs[2]), + ]; + let sec_tap = sec_taps[k]; + for s_elem in s.iter() { + if *s_elem != CDEF_VERY_LARGE { + max = cmp::max(*s_elem, max); + } + min = cmp::min(*s_elem, min); + sum += sec_tap + * constrain( i32::cast_from(*s_elem) - i32::cast_from(x), - sec_strength, - damping, - ); + sec_strength, + damping, + ); + } } + let v = i32::cast_from(x) + ((8 + sum - (sum < 0) as i32) >> 4); + dst[i as usize][j as usize] = + T::cast_from(clamp(v, min as i32, max as i32)); } - let v = i32::cast_from(x) + ((8 + sum - (sum < 0) as i32) >> 4); - dst[i as usize][j as usize] = - T::cast_from(clamp(v, min as i32, max as i32)); } } } @@ -260,14 +264,14 @@ fn adjust_strength(strength: i32, var: i32) -> i32 { // boundaries (padding is untouched here). pub fn cdef_analyze_superblock_range( - fi: &FrameInvariants, in_frame: &Frame, blocks: &TileBlocks<'_>, + fi: &FrameInvariants, in_tile: &Tile<'_, u16>, blocks: &TileBlocks<'_>, sb_w: usize, sb_h: usize, ) -> Vec { let mut ret = Vec::::with_capacity(sb_h * sb_w); for sby in 0..sb_h { for sbx in 0..sb_w { let sbo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby }); - ret.push(cdef_analyze_superblock(fi, in_frame, blocks, sbo)); + ret.push(cdef_analyze_superblock(fi, in_tile, blocks, sbo)); } } ret @@ -278,7 +282,7 @@ pub fn cdef_analyze_superblock_range( // boundaries (padding is untouched here). pub fn cdef_analyze_superblock( - fi: &FrameInvariants, in_frame: &Frame, blocks: &TileBlocks<'_>, + fi: &FrameInvariants, in_tile: &Tile, blocks: &TileBlocks<'_>, sbo: TileSuperBlockOffset, ) -> CdefDirections { let coeff_shift = fi.sequence.bit_depth as usize - 8; @@ -296,11 +300,14 @@ pub fn cdef_analyze_superblock( if !skip { let mut var: u32 = 0; - let in_plane = &in_frame.planes[0]; - let in_po = sbo.plane_offset(&in_plane.cfg); - let in_slice = in_plane.slice(in_po); + let in_region = + in_tile.planes[0].subregion(Area::BlockRect { + bo: block_offset.0, + width: 8, + height: 8, + }); dir.dir[bx][by] = cdef_find_dir::( - &in_slice.reslice(8 * bx as isize, 8 * by as isize), + &in_region, &mut var, coeff_shift, fi.cpu_feature_level, @@ -319,7 +326,7 @@ pub fn cdef_analyze_superblock( // blocks, the minimum working unit of the CDEF filters. pub fn cdef_block8_frame( w_8: usize, h_8: usize, pattern_tile: &Tile<'_, T>, -) -> Frame { +) -> Frame { Frame { planes: [ { @@ -338,6 +345,44 @@ pub fn cdef_block8_frame( } } +// Allocates and returns a new Frame with its own memory that is +// patterned on the decimation of the Frame backing the passed-in +// Tile. The width and height are in units of 8-pixel (undecimated) +// blocks, the minimum working unit of the CDEF filters. The contents +// of the tile, beginning at the passed in superblock offset, are +// copied into the new Frame. +pub fn cdef_tile_copy( + tile: &Tile<'_, U>, sbo: TileSuperBlockOffset, w_8: usize, h_8: usize, +) -> Frame { + let mut out = { + Frame { + planes: { + let new_plane = |pli: usize| { + let &PlaneConfig { xdec, ydec, .. } = tile.planes[pli].plane_cfg; + Plane::new(w_8 << 3 >> xdec, h_8 << 3 >> ydec, xdec, ydec, 0, 0) + }; + [new_plane(0), new_plane(1), new_plane(2)] + }, + } + }; + // Copy data into frame + for pli in 0..3 { + let PlaneOffset { x, y } = sbo.plane_offset(tile.planes[pli].plane_cfg); + let out_width = out.planes[pli].cfg.width as isize; + let out_height = out.planes[pli].cfg.height as isize; + let mut out_region = out.planes[pli].as_region_mut(); + for yi in 0..out_height { + let out_row = &mut out_region[yi as usize]; + let in_row = &tile.planes[pli][(y + yi) as usize]; + for xi in 0..out_width { + out_row[xi as usize] = + T::cast_from(u16::cast_from(in_row[(x + xi) as usize])); + } + } + } + out +} + // Allocates and returns a new Frame with its own memory that is // patterned on the decimation of the Frame backing the passed-in // Tile. The width and height are in units of 8-pixel (undecimated) @@ -478,7 +523,7 @@ pub fn cdef_padded_frame_copy(in_frame: &Frame) -> Frame { // large as the unpadded area of in // cdef_index is taken from the block context pub fn cdef_filter_superblock( - fi: &FrameInvariants, in_frame: &Frame, out: &mut TileMut<'_, U>, + fi: &FrameInvariants, input: &Tile, output: &mut TileMut<'_, U>, blocks: &TileBlocks<'_>, sbo: TileSuperBlockOffset, cdef_index: u8, cdef_dirs: &CdefDirections, ) { @@ -512,19 +557,24 @@ pub fn cdef_filter_superblock( let dir = cdef_dirs.dir[bx][by]; let var = cdef_dirs.var[bx][by]; for p in 0..planes { - let out_plane = &mut out.planes[p]; - let in_plane = &in_frame.planes[p]; - let in_po = sbo.plane_offset(&in_plane.cfg); - let xdec = in_plane.cfg.xdec; - let ydec = in_plane.cfg.ydec; - let in_stride = in_plane.cfg.stride; - let in_slice = &in_plane.slice(in_po); - let out_region = - &mut out_plane.subregion_mut(Area::BlockStartingAt { - bo: sbo.block_offset(0, 0).0, - }); + let out_plane = &mut output.planes[p]; + let in_plane = &input.planes[p]; + let xdec = in_plane.plane_cfg.xdec; + let ydec = in_plane.plane_cfg.ydec; let xsize = 8 >> xdec; let ysize = 8 >> ydec; + let in_region = + in_plane.subregion(Area::BlockRect { + bo: block_offset.0, + width: xsize, + height: ysize, + }); + let mut out_region = + out_plane.subregion_mut(Area::BlockRect { + bo: block_offset.0, + width: xsize, + height: ysize, + }); if !skip { let local_pri_strength; @@ -556,48 +606,29 @@ pub fn cdef_filter_superblock( } }; - unsafe { - let PlaneConfig { ypad, xpad, .. } = in_slice.plane.cfg; - assert!( - in_slice.rows_iter().len() + ypad - >= ((8 * by) >> ydec) + ysize + 2 - ); - assert!(in_slice.x - 2 >= -(xpad as isize)); - assert!(in_slice.y - 2 >= -(ypad as isize)); - - let mut dst = out_region.subregion_mut(Area::BlockRect { - bo: BlockOffset { x: 2 * bx, y: 2 * by }, - width: xsize, - height: ysize, - }); - let input = - in_slice[(8 * by) >> ydec][(8 * bx) >> xdec..].as_ptr(); - cdef_filter_block( - &mut dst, - input, - in_stride as isize, - local_pri_strength, - local_sec_strength, - local_dir, - local_damping, - bit_depth, - xdec, - ydec, - fi.cpu_feature_level, - ); - } + assert!( in_region.rect().x - 2 >= in_region.pad_rect().x ); + assert!( in_region.rect().y - 2 >= in_region.pad_rect().y ); + assert!( in_region.rect().x + in_region.rect().width as isize + 2 <= + in_region.pad_rect().x+in_region.pad_rect().width as isize ); + assert!( in_region.rect().y+in_region.rect().height as isize + 2 <= + in_region.pad_rect().y+in_region.pad_rect().height as isize ); + cdef_filter_block( + &mut out_region, + &in_region, + local_pri_strength, + local_sec_strength, + local_dir, + local_damping, + bit_depth, + xdec, + ydec, + fi.cpu_feature_level, + ); } else { // we need to copy input to output - let in_block = - in_slice.subslice((8 * bx) >> xdec, (8 * by) >> ydec); - let mut out_block = out_region.subregion_mut(Area::BlockRect { - bo: BlockOffset { x: 2 * bx, y: 2 * by }, - width: xsize, - height: ysize, - }); for i in 0..ysize { for j in 0..xsize { - out_block[i][j] = U::cast_from(in_block[i][j]); + out_region[i][j] = U::cast_from(in_region[i][j]); } } } @@ -688,11 +719,11 @@ pub fn cdef_filter_tile( for fbx in 0..fb_width { let sbo = TileSuperBlockOffset(SuperBlockOffset { x: fbx, y: fby }); let cdef_index = tb.get_cdef(sbo); - let cdef_dirs = cdef_analyze_superblock(fi, &cdef_frame, tb, sbo); + let cdef_dirs = cdef_analyze_superblock(fi, &cdef_frame.as_tile(), tb, sbo); cdef_filter_superblock( fi, - &cdef_frame, + &cdef_frame.as_tile(), rec, tb, sbo, diff --git a/src/deblock.rs b/src/deblock.rs index fc794824ec..c4a12e1c39 100644 --- a/src/deblock.rs +++ b/src/deblock.rs @@ -434,8 +434,8 @@ fn deblock_h_size4( // Assumes rec[0] and src[0] are set 2 taps back from the edge. // Accesses four taps, accumulates four pixels into the tally -fn sse_size4( - rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, +fn sse_size4( + rec: &PlaneRegion<'_, U>, src: &PlaneRegion<'_, T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, ) { for i in 0..4 { @@ -568,8 +568,8 @@ fn deblock_h_size6( // Assumes rec[0] and src[0] are set 3 taps back from the edge. // Accesses six taps, accumulates four pixels into the tally -fn sse_size6( - rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, +fn sse_size6( + rec: &PlaneRegion<'_, U>, src: &PlaneRegion<'_, T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, ) { let flat = 1 << (bd - 8); @@ -751,8 +751,8 @@ fn deblock_h_size8( // Assumes rec[0] and src[0] are set 4 taps back from the edge. // Accesses eight taps, accumulates six pixels into the tally -fn sse_size8( - rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, +fn sse_size8( + rec: &PlaneRegion<'_, U>, src: &PlaneRegion<'_, T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, ) { let flat = 1 << (bd - 8); @@ -953,8 +953,8 @@ fn deblock_h_size14( // Assumes rec[0] and src[0] are set 7 taps back from the edge. // Accesses fourteen taps, accumulates twelve pixels into the tally -fn sse_size14( - rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>, +fn sse_size14( + rec: &PlaneRegion<'_, U>, src: &PlaneRegion<'_, T>, tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize, ) { let flat = 1 << (bd - 8); @@ -1165,8 +1165,8 @@ fn filter_v_edge( } } -fn sse_v_edge( - blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion, +fn sse_v_edge( + blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion, src_plane: &PlaneRegion, tally: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, bd: usize, xdec: usize, ydec: usize, ) { @@ -1261,8 +1261,8 @@ fn filter_h_edge( } } -fn sse_h_edge( - blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion, +fn sse_h_edge( + blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion, src_plane: &PlaneRegion, tally: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, bd: usize, xdec: usize, ydec: usize, ) { @@ -1481,8 +1481,8 @@ pub fn deblock_plane( } // sse count of all edges in a single plane, accumulates into vertical and horizontal counts -fn sse_plane( - rec: &PlaneRegion, src: &PlaneRegion, +fn sse_plane( + rec: &PlaneRegion, src: &PlaneRegion, v_sse: &mut [i64; MAX_LOOP_FILTER + 2], h_sse: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, blocks: &TileBlocks, crop_w: usize, crop_h: usize, bd: usize, @@ -1575,8 +1575,8 @@ pub fn deblock_filter_frame( ); } -fn sse_optimize( - rec: &Tile, input: &Tile, blocks: &TileBlocks, crop_w: usize, +fn sse_optimize( + rec: &Tile, input: &Tile, blocks: &TileBlocks, crop_w: usize, crop_h: usize, bd: usize, monochrome: bool, ) -> [u8; 4] { // i64 allows us to accumulate a total of ~ 35 bits worth of pixels @@ -1643,7 +1643,7 @@ fn sse_optimize( #[hawktracer(deblock_filter_optimize)] pub fn deblock_filter_optimize( - fi: &FrameInvariants, rec: &Tile, input: &Tile, + fi: &FrameInvariants, rec: &Tile, input: &Tile, blocks: &TileBlocks, crop_w: usize, crop_h: usize, ) -> [u8; 4] { if fi.config.speed_settings.fast_deblock { diff --git a/src/lrf.rs b/src/lrf.rs index 01359c43a4..2867631613 100644 --- a/src/lrf.rs +++ b/src/lrf.rs @@ -19,10 +19,10 @@ use crate::color::ChromaSampling::Cs400; use crate::context::{MAX_PLANES, SB_SIZE}; use crate::encoder::FrameInvariants; use crate::frame::{ - AsRegion, Frame, Plane, PlaneConfig, PlaneOffset, PlaneSlice, + AsRegion, Frame, Plane, PlaneConfig, PlaneOffset, }; use crate::hawktracer::*; -use crate::tiling::{Area, PlaneRegionMut, Rect}; +use crate::tiling::{Area, PadIndex, PadRow, PlaneRegion, PlaneRegionMut, Rect}; use crate::util::{clamp, CastFromPrimitive, ILog, Pixel}; use crate::api::SGRComplexityLevel; @@ -171,11 +171,11 @@ impl RestorationFilter { pub(crate) mod rust { use crate::cpu_features::CpuFeatureLevel; - use crate::frame::PlaneSlice; use crate::lrf::{ get_integral_square, sgrproj_sum_finish, SGRPROJ_RST_BITS, SGRPROJ_SGR_BITS, }; + use crate::tiling::PlaneRegion; use crate::util::CastFromPrimitive; use crate::Pixel; @@ -204,7 +204,7 @@ pub(crate) mod rust { af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32], iimg_stride: usize, y: usize, stripe_w: usize, s: u32, bdm8: usize, _cpu: CpuFeatureLevel, - ) { + ) { sgrproj_box_ab_internal( 1, af, @@ -242,7 +242,7 @@ pub(crate) mod rust { } pub(crate) fn sgrproj_box_f_r0( - f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneSlice, + f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, _cpu: CpuFeatureLevel, ) { sgrproj_box_f_r0_internal(f, 0, y, w, cdeffed); @@ -250,16 +250,17 @@ pub(crate) mod rust { #[inline(always)] pub(crate) fn sgrproj_box_f_r0_internal( - f: &mut [u32], start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice, + f: &mut [u32], start_x: usize, y: usize, w: usize, + cdeffed: &PlaneRegion<'_, T>, ) { for x in start_x..w { - f[x] = (u32::cast_from(cdeffed.p(x, y))) << SGRPROJ_RST_BITS; + f[x] = (u32::cast_from(cdeffed[y][x])) << SGRPROJ_RST_BITS; } } pub(crate) fn sgrproj_box_f_r1( af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], y: usize, w: usize, - cdeffed: &PlaneSlice, _cpu: CpuFeatureLevel, + cdeffed: &PlaneRegion<'_, T>, _cpu: CpuFeatureLevel, ) { sgrproj_box_f_r1_internal(af, bf, f, 0, y, w, cdeffed); } @@ -267,7 +268,7 @@ pub(crate) mod rust { #[inline(always)] pub(crate) fn sgrproj_box_f_r1_internal( af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], start_x: usize, - y: usize, w: usize, cdeffed: &PlaneSlice, + y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, ) { let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS; for x in start_x..w { @@ -285,14 +286,14 @@ pub(crate) mod rust { + bf[1][x + 1] + bf[2][x + 1] + bf[1][x + 2]); - let v = a * u32::cast_from(cdeffed.p(x, y)) + b; + let v = a * u32::cast_from(cdeffed[y][x]) + b; f[x] = (v + (1 << shift >> 1)) >> shift; } } pub(crate) fn sgrproj_box_f_r2( af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32], - y: usize, w: usize, cdeffed: &PlaneSlice, _cpu: CpuFeatureLevel, + y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, _cpu: CpuFeatureLevel, ) { sgrproj_box_f_r2_internal(af, bf, f0, f1, 0, y, w, cdeffed); } @@ -300,7 +301,7 @@ pub(crate) mod rust { #[inline(always)] pub(crate) fn sgrproj_box_f_r2_internal( af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32], - start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice, + start_x: usize, y: usize, w: usize, cdeffed: &PlaneRegion<'_, T>, ) { let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS; let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS; @@ -309,9 +310,9 @@ pub(crate) mod rust { let b = 5 * (bf[0][x] + bf[0][x + 2]) + 6 * (bf[0][x + 1]); let ao = 5 * (af[1][x] + af[1][x + 2]) + 6 * (af[1][x + 1]); let bo = 5 * (bf[1][x] + bf[1][x + 2]) + 6 * (bf[1][x + 1]); - let v = (a + ao) * u32::cast_from(cdeffed.p(x, y)) + b + bo; + let v = (a + ao) * u32::cast_from(cdeffed[y][x]) + b + bo; f0[x] = (v + (1 << shift >> 1)) >> shift; - let vo = ao * u32::cast_from(cdeffed.p(x, y + 1)) + bo; + let vo = ao * u32::cast_from(cdeffed[y+1][x]) + bo; f1[x] = (vo + (1 << shifto >> 1)) >> shifto; } } @@ -351,10 +352,8 @@ fn get_integral_square( struct VertPaddedIter<'a, T: Pixel> { // The two sources that can be selected when clipping - deblocked: &'a Plane, - cdeffed: &'a Plane, - // x index to choice where on the row to start - x: isize, + deblocked: &'a PlaneRegion<'a, T>, + cdeffed: &'a PlaneRegion<'a, T>, // y index that will be mutated y: isize, // The index at which to terminate. Can be larger than the slice length. @@ -372,67 +371,56 @@ struct VertPaddedIter<'a, T: Pixel> { impl<'a, 'b, T: Pixel> VertPaddedIter<'a, T> { fn new( - cdeffed: &PlaneSlice<'a, T>, deblocked: &PlaneSlice<'a, T>, + cdeffed: &'a PlaneRegion<'a, T>, deblocked: &'a PlaneRegion<'a, T>, stripe_h: usize, crop: usize, ) -> VertPaddedIter<'a, T> { - // cdeffed and deblocked must start at the same coordinates from their - // underlying planes. Since cropping is provided via a separate params, the - // height of the underlying planes do not need to match. - assert_eq!(cdeffed.x, deblocked.x); - assert_eq!(cdeffed.y, deblocked.y); - // To share integral images, always use the max box filter radius of 2 let r = 2; // The number of rows outside the stripe are needed - let rows_above = r + 2; + let rows_above:isize = r + 2; let rows_below = 2; - // Offset crop and stripe_h so they are relative to the underlying plane - // and not the plane slice. - let crop = crop as isize + deblocked.y; - let stripe_end = stripe_h as isize + deblocked.y; - // Move y up the number rows above. - // If y is negative we repeat the first row - let y = deblocked.y - rows_above as isize; + // If y is above the tile/frame, we repeat the first row + let y = - rows_above as isize; VertPaddedIter { - deblocked: deblocked.plane, - cdeffed: cdeffed.plane, - x: deblocked.x, + deblocked, + cdeffed, y, - end: (rows_above + stripe_h + rows_below) as isize + y, - stripe_begin: deblocked.y, - stripe_end, - crop, + end: (stripe_h + rows_below) as isize, + stripe_begin: 0, + stripe_end: stripe_h as isize, + crop: crop as isize, } } } impl<'a, T: Pixel> Iterator for VertPaddedIter<'a, T> { - type Item = &'a [T]; + type Item = &'a PadRow<'a, T>; #[inline(always)] fn next(&mut self) -> Option { if self.end > self.y { // clamp before deciding the source // clamp vertically to storage at top and passed-in height at bottom - let cropped_y = clamp(self.y, 0, self.crop - 1); + let cropped_y = clamp( + self.y, + - self.deblocked.rect().y, + self.crop - 1); // clamp vertically to stripe limits let ly = clamp(cropped_y, self.stripe_begin - 2, self.stripe_end + 1); // decide if we're vertically inside or outside the strip let src_plane = if ly >= self.stripe_begin && ly < self.stripe_end as isize { - self.cdeffed + &self.cdeffed } else { - self.deblocked + &self.deblocked }; - // cannot directly return self.ps.row(row) due to lifetime issue - let range = src_plane.row_range(self.x, ly); self.y += 1; - Some(&src_plane.data[range]) + Some(&src_plane[PadIndex(ly)]) } else { None } @@ -451,24 +439,25 @@ impl ExactSizeIterator for VertPaddedIter<'_, T> {} impl FusedIterator for VertPaddedIter<'_, T> {} struct HorzPaddedIter<'a, T: Pixel> { - // Active area cropping is done using the length of the slice - slice: &'a [T], + row: &'a PadRow<'a, T>, // x index of the iterator // When less than 0, repeat the first element. When greater than end, repeat // the last element index: isize, + crop: isize, // The index at which to terminate. Can be larger than the slice length. - end: usize, + end: isize, } impl<'a, T: Pixel> HorzPaddedIter<'a, T> { fn new( - slice: &'a [T], start_index: isize, width: usize, + row: &'a PadRow<'a,T>, index: isize, crop: isize, end: isize, ) -> HorzPaddedIter<'a, T> { HorzPaddedIter { - slice, - index: start_index, - end: (width as isize + start_index) as usize, + row, + index, + crop, + end, } } } @@ -480,9 +469,9 @@ impl<'a, T: Pixel> Iterator for HorzPaddedIter<'a, T> { fn next(&mut self) -> Option { if self.index < self.end as isize { // clamp to the edges of the frame - let x = clamp(self.index, 0, self.slice.len() as isize - 1) as usize; + let x = clamp(self.index, -self.row.x(), self.crop as isize - 1); self.index += 1; - Some(&self.slice[x]) + Some(&self.row[PadIndex(x)]) } else { None } @@ -501,7 +490,7 @@ impl FusedIterator for HorzPaddedIter<'_, T> {} pub fn setup_integral_image( integral_image_buffer: &mut IntegralImageBuffer, integral_image_stride: usize, crop_w: usize, crop_h: usize, stripe_w: usize, - stripe_h: usize, cdeffed: &PlaneSlice, deblocked: &PlaneSlice, + stripe_h: usize, cdeffed: &PlaneRegion<'_, T>, deblocked: &PlaneRegion<'_, T>, ) { let integral_image = &mut integral_image_buffer.integral_image; let sq_integral_image = &mut integral_image_buffer.sq_integral_image; @@ -510,32 +499,21 @@ pub fn setup_integral_image( let left_w = 4; // max radius of 2 + 2 padding let right_w = 3; // max radius of 2 + 1 padding - assert_eq!(cdeffed.x, deblocked.x); - - // Find how many unique elements to use to the left and right - let left_uniques = if cdeffed.x == 0 { 0 } else { left_w }; - let right_uniques = right_w.min(crop_w - stripe_w); - - // Find the total number of unique elements used - let row_uniques = left_uniques + stripe_w + right_uniques; - - // Negative start indices result in repeating the first element of the row - let start_index_x = if cdeffed.x == 0 { -(left_w as isize) } else { 0 }; - let mut rows_iter = VertPaddedIter::new( // Move left to encompass all the used data - &cdeffed.go_left(left_uniques), - &deblocked.go_left(left_uniques), + &cdeffed, + &deblocked, // since r2 uses every other row, we need an extra row if stripe_h is odd stripe_h + (stripe_h & 1), crop_h, ) - .map(|row: &[T]| { + .map(|row: &PadRow<'_, T>| { HorzPaddedIter::new( // Limit how many unique elements we use - &row[..row_uniques], - start_index_x, - left_w + stripe_w + right_w, + row, + -left_w, + crop_w as isize, + stripe_w as isize + right_w, ) }); @@ -597,10 +575,10 @@ pub fn setup_integral_image( } } -pub fn sgrproj_stripe_filter( +pub fn sgrproj_stripe_filter( set: u8, xqd: [i8; 2], fi: &FrameInvariants, integral_image_buffer: &IntegralImageBuffer, integral_image_stride: usize, - cdeffed: &PlaneSlice, out: &mut PlaneRegionMut, + cdeffed: &PlaneRegion, out: &mut PlaneRegionMut, ) { let &Rect { width: stripe_w, height: stripe_h, .. } = out.rect(); let bdm8 = fi.sequence.bit_depth - 8; @@ -808,8 +786,8 @@ pub fn sgrproj_stripe_filter( // Inputs are relative to the colocated slice views. pub fn sgrproj_solve( set: u8, fi: &FrameInvariants, - integral_image_buffer: &IntegralImageBuffer, input: &PlaneSlice, - cdeffed: &PlaneSlice, cdef_w: usize, cdef_h: usize, + integral_image_buffer: &IntegralImageBuffer, input: &PlaneRegion, + cdeffed: &PlaneRegion, cdef_w: usize, cdef_h: usize, ) -> (i8, i8) { let bdm8 = fi.sequence.bit_depth - 8; @@ -1519,10 +1497,14 @@ impl RestorationState { (crop_h as isize - stripe_start_y) as usize, size, stripe_size, - &cdeffed.planes[pli] - .slice(PlaneOffset { x: x as isize, y: stripe_start_y }), - &pre_cdef.planes[pli] - .slice(PlaneOffset { x: x as isize, y: stripe_start_y }), + &cdeffed.planes[pli].region(Area::StartingAt { + x: x as isize, + y: stripe_start_y + }), + &pre_cdef.planes[pli].region(Area::StartingAt { + x: x as isize, + y: stripe_start_y + }), ); sgrproj_stripe_filter( @@ -1531,13 +1513,15 @@ impl RestorationState { fi, &stripe_filter_buffer, STRIPE_IMAGE_STRIDE, - &cdeffed.planes[pli] - .slice(PlaneOffset { x: x as isize, y: stripe_start_y }), + &cdeffed.planes[pli].region(Area::StartingAt { + x: x as isize, + y: stripe_start_y + }), &mut out.planes[pli].region_mut(Area::Rect { - x: x as isize, - y: stripe_start_y, - width: size, - height: stripe_size, + x: x as isize, + y: stripe_start_y, + width: size, + height: stripe_size, }), ); } diff --git a/src/rdo.rs b/src/rdo.rs index aae7364b7d..6a7a3694a0 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -1884,7 +1884,7 @@ pub fn rdo_partition_decision( fn rdo_loop_plane_error( base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset, sb_w: usize, sb_h: usize, fi: &FrameInvariants, ts: &TileStateMut<'_, T>, - blocks: &TileBlocks<'_>, test: &Frame, src: &Frame, pli: usize, + blocks: &TileBlocks<'_>, test: &Tile<'_, T>, src: &Tile<'_, T>, pli: usize, ) -> ScaledDistortion { let sb_w_blocks = if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w; @@ -1899,9 +1899,9 @@ fn rdo_loop_plane_error( if loop_bo.0.x < blocks.cols() && loop_bo.0.y < blocks.rows() { let src_plane = &src.planes[pli]; let test_plane = &test.planes[pli]; - let PlaneConfig { xdec, ydec, .. } = src_plane.cfg; - debug_assert_eq!(xdec, test_plane.cfg.xdec); - debug_assert_eq!(ydec, test_plane.cfg.ydec); + let &PlaneConfig { xdec, ydec, .. } = src_plane.plane_cfg; + debug_assert_eq!(xdec, test_plane.plane_cfg.xdec); + debug_assert_eq!(ydec, test_plane.plane_cfg.ydec); // Unfortunately, our distortion biases are only available via // Frame-absolute addressing, so we need a block offset @@ -1915,9 +1915,9 @@ fn rdo_loop_plane_error( ); let src_region = - src_plane.region(Area::BlockStartingAt { bo: loop_bo.0 }); + src_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 }); let test_region = - test_plane.region(Area::BlockStartingAt { bo: loop_bo.0 }); + test_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 }); err += if pli == 0 { // For loop filters, We intentionally use cdef_dist even with @@ -1998,7 +1998,8 @@ pub fn rdo_loop_decision( const MAX_SB_SIZE: usize = 1 << MAX_SB_SHIFT; const MAX_LRU_SIZE: usize = MAX_SB_SIZE; - // Static allocation relies on the "minimal LRU area for all N planes" invariant. + // Static allocation relies on the "minimal LRU area for all N + // planes" invariant. let mut best_index = [-1; MAX_SB_SIZE * MAX_SB_SIZE]; let mut best_lrf = [[RestorationFilter::None; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE]; @@ -2022,7 +2023,7 @@ pub fn rdo_loop_decision( // flagging the border pixels as inactive]. LR code currently does // not need and will not use padding area. It always edge-extends // the passed in rectangle. - let mut rec_subset = { + let mut rec_subset16:Frame = { let const_rec = ts.rec.as_const(); // a padding of 8 gets us a full block of border. CDEF // only needs 2 pixels, but deblocking is happier with full @@ -2036,31 +2037,22 @@ pub fn rdo_loop_decision( planes, ) }; + let mut rec_region16 = rec_subset16.as_tile_mut(); // sub-setted region of the TileBlocks for our working frame area - let mut tileblocks_subset = cw.bc.blocks.subregion( + let mut tileblocks_region = cw.bc.blocks.subregion( base_sbo.block_offset(0, 0).0.x, base_sbo.block_offset(0, 0).0.y, sb_w << SUPERBLOCK_TO_BLOCK_SHIFT, sb_h << SUPERBLOCK_TO_BLOCK_SHIFT, ); - // why copy and not just a view? Because CDEF optimization requires - // u16 working space. This avoids adding another generic buffer - // typing parameter and expanding code to handle all the possible - // input/output combinations. In the future we may decide to prefer - // that over the additional temp buffer (after doing the work needed - // to allow CDEF opt to work on 8 bit). - let src_subset = { - cdef_padded_tile_copy( - &ts.input_tile, - base_sbo, - (pixel_w + 7) >> 3, - (pixel_h + 7) >> 3, - 0, - planes, - ) - }; + let src_region:Tile<'_, T> = ts.input_tile.subregion (Area::Rect { + x: (base_sbo.0.x << SUPERBLOCK_TO_PLANE_SHIFT) as isize, + y: (base_sbo.0.y << SUPERBLOCK_TO_PLANE_SHIFT) as isize, + width: pixel_w, + height: pixel_h, + }); if deblock_p { // Find a good deblocking filter solution for the passed in area. @@ -2068,9 +2060,9 @@ pub fn rdo_loop_decision( // better results from CDEF/LRF RDO. let deblock_levels = deblock_filter_optimize( fi, - &rec_subset.as_tile(), - &src_subset.as_tile(), - &tileblocks_subset.as_const(), + &rec_region16.as_const(), + &src_region, + &tileblocks_region.as_const(), crop_w, crop_h, ); @@ -2084,8 +2076,8 @@ pub fn rdo_loop_decision( // finally, deblock the temp frame deblock_filter_frame( &deblock_copy, - &mut rec_subset.as_tile_mut(), - &tileblocks_subset.as_const(), + &mut rec_region16, + &tileblocks_region.as_const(), crop_w, crop_h, fi.sequence.bit_depth, @@ -2094,44 +2086,62 @@ pub fn rdo_loop_decision( } } - let mut cdef_work = if fi.sequence.enable_cdef { - Some(cdef_padded_tile_copy( - &rec_subset.as_tile(), + let mut rec_subset:Frame = { + cdef_tile_copy( + &rec_region16.as_const(), TileSuperBlockOffset(SuperBlockOffset { x: 0, y: 0 }), (pixel_w + 7) >> 3, (pixel_h + 7) >> 3, - 0, - planes, + ) + }; + let rec_region = rec_subset.as_tile_mut(); + + let mut cdef_work; + let mut cdef_region = if fi.sequence.enable_cdef { + cdef_work = cdef_tile_copy ( + &rec_region16.as_const(), + TileSuperBlockOffset(SuperBlockOffset { x: 0, y: 0 }), + (pixel_w + 7) >> 3, + (pixel_h + 7) >> 3, + ); + Some(cdef_work.as_tile_mut()) + } else { + None + }; + + let mut cdef_dirs = if fi.sequence.enable_cdef { + Some(cdef_analyze_superblock_range( + fi, + &rec_region16.as_const(), + &tileblocks_region.as_const(), + sb_w, + sb_h, )) } else { None }; - let mut lrf_work = if fi.sequence.enable_restoration { - Some(cdef_block8_frame( + + let mut lrf_work; + let mut lrf_region = if fi.sequence.enable_restoration { + lrf_work = cdef_block8_frame( (pixel_w + 7) >> 3, (pixel_h + 7) >> 3, &ts.rec.as_const(), - )) + ); + Some(lrf_work.as_tile_mut()) } else { None }; // Precompute directional analysis for CDEF - let cdef_data = { - if cdef_work.is_some() { - Some(( - &rec_subset, - cdef_analyze_superblock_range( - fi, - &rec_subset, - &tileblocks_subset.as_const(), - sb_w, - sb_h, - ), - )) - } else { - None - } + let mut cdef_data = if fi.sequence.enable_cdef { + Some(( + &rec_region16, + cdef_region.as_mut().unwrap(), + cdef_dirs.as_mut().unwrap(), + )) + } else { + None }; // CDEF/LRF decision iteration @@ -2144,8 +2154,7 @@ pub fn rdo_loop_decision( let mut lrf_change = true; while cdef_change || lrf_change { // search for improved cdef indices, superblock by superblock, if cdef is enabled. - if let (Some((rec_copy, cdef_dirs)), Some(cdef_ref)) = - (&cdef_data, &mut cdef_work.as_mut()) + if let Some((rec, cdef, cdef_dirs)) = cdef_data.as_mut() { for sby in 0..sb_h { for sbx in 0..sb_w { @@ -2161,21 +2170,11 @@ pub fn rdo_loop_decision( for cdef_index in 0..(1 << fi.cdef_bits) { let mut err = ScaledDistortion::zero(); let mut rate = 0; - - let mut cdef_ref_tm = TileMut::new( - cdef_ref, - TileRect { - x: 0, - y: 0, - width: cdef_ref.planes[0].cfg.width, - height: cdef_ref.planes[0].cfg.height, - }, - ); cdef_filter_superblock( fi, - &rec_subset, - &mut cdef_ref_tm, - &tileblocks_subset.as_const(), + &rec_region16.as_const(), + cdef, + &tileblocks_region.as_const(), loop_sbo, cdef_index, &cdef_dirs[sby * sb_w + sbx], @@ -2185,23 +2184,23 @@ pub fn rdo_loop_decision( // We need the cropped-to-visible-frame area of this SB let wh = if fi.sequence.use_128x128_superblock { 128 } else { 64 }; - let PlaneConfig { xdec, ydec, .. } = cdef_ref.planes[pli].cfg; + let PlaneConfig { xdec, ydec, .. } = cdef.planes[pli].plane_cfg; let vis_width = (wh >> xdec).min( (crop_w >> xdec) - - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).x + - loop_sbo.plane_offset(&cdef.planes[pli].plane_cfg).x as usize, ); let vis_height = (wh >> ydec).min( (crop_h >> ydec) - - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).y + - loop_sbo.plane_offset(&cdef.planes[pli].plane_cfg).y as usize, ); // which LRU are we currently testing against? - if let (Some((lru_x, lru_y)), Some(lrf_ref)) = { + if let (Some((lru_x, lru_y)), Some(lrf)) = { let rp = &ts.restoration.planes[pli]; ( rp.restoration_unit_offset(base_sbo, loop_sbo, false), - &mut lrf_work, + &mut lrf_region, ) } { // We have a valid LRU, apply LRF, compute error @@ -2214,9 +2213,9 @@ pub fn rdo_loop_decision( 1, fi, ts, - &tileblocks_subset.as_const(), - cdef_ref, - &src_subset, + &tileblocks_region.as_const(), + &cdef.as_const(), + &src_region, pli, ); rate += if fi.sequence.enable_restoration { @@ -2233,8 +2232,9 @@ pub fn rdo_loop_decision( } RestorationFilter::Sgrproj { set, xqd } => { // only run on this single superblock - let loop_po = - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg); + let loop_cdef = &cdef.planes[pli]. + subregion(Area::SuperBlockStartingAt{sbo: loop_sbo.0}, + ); // todo: experiment with borrowing border pixels // rather than edge-extending. Right now this is // hard-clipping to the superblock boundary. @@ -2245,8 +2245,8 @@ pub fn rdo_loop_decision( vis_height, vis_width, vis_height, - &cdef_ref.planes[pli].slice(loop_po), - &cdef_ref.planes[pli].slice(loop_po), + loop_cdef, + loop_cdef, ); sgrproj_stripe_filter( set, @@ -2254,13 +2254,17 @@ pub fn rdo_loop_decision( fi, &ts.integral_buffer, SOLVE_IMAGE_STRIDE, - &cdef_ref.planes[pli].slice(loop_po), - &mut lrf_ref.planes[pli].region_mut(Area::Rect { - x: loop_po.x, - y: loop_po.y, - width: vis_width, - height: vis_height, - }), + loop_cdef, + // set the outer access bounds via as_region_mut + // (the entire scratch buffer), then get a + // subregion view. This allows lrf access to + // padding. + &mut lrf.planes[pli].subregion_mut( + Area::SuperBlockRect { + sbo: loop_sbo.0, + width: vis_width, + height: vis_height, + }), ); err += rdo_loop_plane_error( base_sbo, @@ -2269,9 +2273,9 @@ pub fn rdo_loop_decision( 1, fi, ts, - &tileblocks_subset.as_const(), - lrf_ref, - &src_subset, + &tileblocks_region.as_const(), + &lrf.as_const(), + &src_region, pli, ); rate += cw.count_lrf_switchable( @@ -2292,9 +2296,9 @@ pub fn rdo_loop_decision( 1, fi, ts, - &tileblocks_subset.as_const(), - cdef_ref, - &src_subset, + &tileblocks_region.as_const(), + &cdef.as_const(), + &src_region, pli, ); // no relative cost differeneces to different @@ -2314,26 +2318,16 @@ pub fn rdo_loop_decision( if best_new_index != prev_best_index { cdef_change = true; best_index[sby * sb_w + sbx] = best_new_index; - tileblocks_subset.set_cdef(loop_sbo, best_new_index as u8); + tileblocks_region.set_cdef(loop_sbo, best_new_index as u8); } - let mut cdef_ref_tm = TileMut::new( - cdef_ref, - TileRect { - x: 0, - y: 0, - width: cdef_ref.planes[0].cfg.width, - height: cdef_ref.planes[0].cfg.height, - }, - ); - // Keep cdef output up to date; we need it for restoration // both below and above (padding) cdef_filter_superblock( fi, - rec_copy, - &mut cdef_ref_tm, - &tileblocks_subset.as_const(), + &rec.as_const(), + cdef, + &tileblocks_region.as_const(), loop_sbo, best_index[sby * sb_w + sbx] as u8, &cdef_dirs[sby * sb_w + sbx], @@ -2349,14 +2343,18 @@ pub fn rdo_loop_decision( lrf_change = false; // search for improved restoration filter parameters if restoration is enabled - if let Some(lrf_ref) = &mut lrf_work.as_mut() { - let lrf_input = if cdef_work.is_some() { + if let Some(lrf_output) = &mut lrf_region.as_mut() { + let lrf_input = if let Some(( + _rec, + cdef, + _cdef_dirs)) = &cdef_data + { // When CDEF is enabled, we pull from the CDEF output - &cdef_work.as_ref().unwrap() + cdef } else { // When CDEF is disabled, we pull from the [optionally // deblocked] reconstruction - &rec_subset + &rec_region }; for pli in 0..planes { // Nominal size of LRU in pixels before clipping to visible frame @@ -2365,7 +2363,7 @@ pub fn rdo_loop_decision( let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift; // height, in sb, of an LRU in this plane let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift; - let PlaneConfig { xdec, ydec, .. } = lrf_ref.planes[pli].cfg; + let PlaneConfig { xdec, ydec, .. } = lrf_output.planes[pli].plane_cfg; for lru_y in 0..lru_h[pli] { // number of LRUs vertically for lru_x in 0..lru_w[pli] { @@ -2379,9 +2377,6 @@ pub fn rdo_loop_decision( pli, false, ) { - let src_plane = &src_subset.planes[pli]; // uncompressed input for reference - let lrf_in_plane = &lrf_input.planes[pli]; - let lrf_po = loop_sbo.plane_offset(&src_plane.cfg); let mut best_new_lrf = best_lrf[lru_y * lru_w[pli] + lru_x][pli]; let mut best_cost = best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli]; @@ -2395,9 +2390,9 @@ pub fn rdo_loop_decision( lru_sb_h, fi, ts, - &tileblocks_subset.as_const(), - lrf_input, - &src_subset, + &tileblocks_region.as_const(), + &lrf_input.as_const(), + &src_region, pli, ); let rate = cw.count_lrf_switchable( @@ -2420,12 +2415,16 @@ pub fn rdo_loop_decision( // We need the cropped-to-visible-frame computation area of this LRU let vis_width = unit_size.min( (crop_w >> xdec) - - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).x as usize, + - loop_sbo.plane_offset(&lrf_output.planes[pli].plane_cfg).x as usize, ); let vis_height = unit_size.min( (crop_h >> ydec) - - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).y as usize, + - loop_sbo.plane_offset(&lrf_output.planes[pli].plane_cfg).y as usize, ); + let src_plane = src_region.planes[pli].subregion( + Area::SuperBlockStartingAt{sbo: loop_sbo.0}); + let lrf_in_plane = lrf_input.planes[pli]. + subregion(Area::SuperBlockStartingAt{sbo: loop_sbo.0}); // todo: experiment with borrowing border pixels // rather than edge-extending. Right now this is @@ -2437,8 +2436,8 @@ pub fn rdo_loop_decision( vis_height, vis_width, vis_height, - &lrf_in_plane.slice(lrf_po), - &lrf_in_plane.slice(lrf_po), + &lrf_in_plane, + &lrf_in_plane, ); for &set in get_sgr_sets(fi.config.speed_settings.sgr_complexity) @@ -2447,8 +2446,8 @@ pub fn rdo_loop_decision( set, fi, &ts.integral_buffer, - &src_plane.slice(lrf_po), - &lrf_in_plane.slice(lrf_po), + &src_plane, + &lrf_in_plane, vis_width, vis_height, ); @@ -2461,13 +2460,18 @@ pub fn rdo_loop_decision( fi, &ts.integral_buffer, SOLVE_IMAGE_STRIDE, - &lrf_in_plane.slice(lrf_po), - &mut lrf_ref.planes[pli].region_mut(Area::Rect { - x: lrf_po.x, - y: lrf_po.y, - width: vis_width, - height: vis_height, - }), + &lrf_in_plane, + // set the outer access bounds via as_region_mut() + // (the entire scratch buffer), then get a + // subregion view. This allows lrf access to + // padding. + &mut lrf_output.planes[pli]. + subregion_mut( + Area::SuperBlockRect { + sbo: loop_sbo.0, + width: vis_width, + height: vis_height, + }), ); } let err = rdo_loop_plane_error( @@ -2477,9 +2481,9 @@ pub fn rdo_loop_decision( lru_sb_h, fi, ts, - &tileblocks_subset.as_const(), - lrf_ref, - &src_subset, + &tileblocks_region.as_const(), + &lrf_output.as_const(), + &src_region, pli, ); let rate = cw.count_lrf_switchable( diff --git a/src/tiling/plane_region.rs b/src/tiling/plane_region.rs index 44801f5eec..0dbd09de7e 100644 --- a/src/tiling/plane_region.rs +++ b/src/tiling/plane_region.rs @@ -16,6 +16,7 @@ use crate::util::*; use std::iter::FusedIterator; use std::marker::PhantomData; use std::ops::{Index, IndexMut}; +use std::rc::Rc; use std::slice; /// Rectangle of a plane region, in pixels @@ -38,6 +39,14 @@ impl Rect { height: self.height >> ydec, } } + pub fn to_area(&self) -> Area { + Area::Rect { + x: self.x, + y: self.y, + width: self.width, + height: self.height, + } + } } // Structure to describe a rectangle area in several ways @@ -65,6 +74,11 @@ pub enum Area { /// a rectangle starting at given block offset until the bottom-right corner /// of the parent BlockStartingAt { bo: BlockOffset }, + /// A well-defined rectangle with offset expressed in superblocks + SuperBlockRect { sbo: SuperBlockOffset, width: usize, height: usize }, + /// a rectangle starting at given superblock offset until the + /// bottom-right corner of the parent + SuperBlockStartingAt { sbo: SuperBlockOffset }, } impl Area { @@ -101,10 +115,106 @@ impl Area { height: (parent_height as isize - y) as usize, } } + Area::SuperBlockRect { sbo, width, height } => Rect { + x: (sbo.x >> xdec << SUPERBLOCK_TO_PLANE_SHIFT) as isize, + y: (sbo.y >> ydec << SUPERBLOCK_TO_PLANE_SHIFT) as isize, + width, + height, + }, + Area::SuperBlockStartingAt { sbo } => { + let x = (sbo.x >> xdec << SUPERBLOCK_TO_PLANE_SHIFT) as isize; + let y = (sbo.y >> ydec << SUPERBLOCK_TO_PLANE_SHIFT) as isize; + Rect { + x, + y, + width: (parent_width as isize - x) as usize, + height: (parent_height as isize - y) as usize, + } + } } } } +pub struct PadIndex(pub isize); +pub struct PadRow<'a, T: Pixel>(pub [PlaneRegion<'a, T>]); +pub struct PadRowMut<'a, T: Pixel>(pub [PlaneRegionMut<'a, T>]); + +macro_rules! pad_row_common { + // $name: PadRow or PadRowMut + ($name:ident, $parent:ident $(,$opt_mut:tt)?) => { + impl<'a, T: Pixel> $name<'a, T> { + pub fn x(&self) -> isize { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + pr.rect().x + } + } + pub fn y(&self) -> isize { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + pr.rect().y + } + } + pub fn pad_x(&self) -> isize { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + pr.pad_rect().x + } + } + pub fn pad_y(&self) -> isize { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + pr.pad_rect().y + } + } + pub fn width(&self) -> usize { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + pr.pad_rect().width + } + } + pub fn pad_width(&self) -> usize { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + pr.pad_rect().width + } + } + } + impl<'a, T: Pixel> Index for $name<'a, T> { + type Output = T; + #[inline(always)] + fn index(&self, index: PadIndex) -> &Self::Output { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + let row = self.0.len() as isize - pr.rect().y + pr.pad_rect().y - 1; + assert!(row >= pr.pad_rect().y - pr.rect().y); + assert!(row < pr.pad_rect().height as isize + pr.pad_rect().y - pr.rect().y); + assert!(index.0 >= pr.pad_rect().x - pr.rect().x); + assert!(index.0 < pr.pad_rect().width as isize + pr.pad_rect().x - pr.rect().x); + &*pr.data.offset(row * pr.plane_cfg.stride as isize + index.0) + } + } + } + impl<'a, T: Pixel> Index for $name<'a, T> { + type Output = T; + #[inline(always)] + fn index(&self, index: usize) -> &Self::Output { + unsafe { + let pr: &$parent<'a, T> = &self.0[0]; + let row = self.0.len() as isize - pr.rect().y + pr.pad_rect().y - 1; + assert!(row >= pr.pad_rect().y - pr.rect().y); + assert!(row < pr.pad_rect().height as isize + pr.pad_rect().y - pr.rect().y); + assert!((index as isize) < pr.pad_rect().width as isize + pr.pad_rect().x - pr.rect().x); + &*pr.data.offset(row * pr.plane_cfg.stride as isize + index as isize) + } + } + } + } +} + +pad_row_common!(PadRow, PlaneRegion); +pad_row_common!(PadRowMut, PlaneRegionMut, mut); + /// Bounded region of a plane /// /// This allows to give access to a rectangular area of a plane without @@ -115,6 +225,7 @@ pub struct PlaneRegion<'a, T: Pixel> { pub plane_cfg: &'a PlaneConfig, // private to guarantee borrowing rules rect: Rect, + bounds: Rect, phantom: PhantomData<&'a T>, } @@ -127,6 +238,7 @@ pub struct PlaneRegionMut<'a, T: Pixel> { data: *mut T, // points to (plane_cfg.x, plane_cfg.y) pub plane_cfg: &'a PlaneConfig, rect: Rect, + bounds: Rect, phantom: PhantomData<&'a mut T>, } @@ -145,6 +257,12 @@ macro_rules! plane_region_common { data: unsafe { std::ptr::null_mut::() }, plane_cfg: cfg, rect, + bounds: Rect{ + x: -(cfg.xorigin as isize), + y: -(cfg.yorigin as isize), + width: cfg.width + cfg.xorigin + cfg.xpad, + height: cfg.height + cfg.yorigin + cfg.ypad, + }, phantom: PhantomData, } } @@ -158,14 +276,26 @@ macro_rules! plane_region_common { data: unsafe { data.$as_ptr().offset(origin) }, plane_cfg: cfg, rect, + bounds: Rect{ + x: -(cfg.xorigin as isize), + y: -(cfg.yorigin as isize), + width: cfg.width + cfg.xorigin + cfg.xpad, + height: cfg.height + cfg.yorigin + cfg.ypad, + }, phantom: PhantomData, } } + #[inline(always)] pub fn new(plane: &'a $($opt_mut)? Plane, rect: Rect) -> Self { Self::from_slice(& $($opt_mut)? plane.data, &plane.cfg, rect) } + #[inline(always)] + pub fn restrict(&mut self) { + self.bounds = self.rect; + } + #[inline(always)] pub fn data_ptr(&self) -> *const T { self.data @@ -176,6 +306,11 @@ macro_rules! plane_region_common { &self.rect } + #[inline(always)] + pub fn pad_rect(&self) -> &Rect { + &self.bounds + } + #[inline(always)] pub fn rows_iter(&self) -> RowsIter<'_, T> { RowsIter { @@ -263,6 +398,43 @@ macro_rules! plane_region_common { data, plane_cfg: &self.plane_cfg, rect: absolute_rect, + bounds: self.bounds, + phantom: PhantomData, + } + } + + #[inline(always)] + // as with subregion above, but allows re-expanding the region. + // This will _not_ allow expanding a region beyond the original + // rectangle created with new() or from_slice(). As such, it + // protects the original Tile/Frame boundaries. + pub fn superregion(&self, area: Area) -> PlaneRegion<'_, T> { + let rect = area.to_rect( + self.plane_cfg.xdec, + self.plane_cfg.ydec, + self.rect.width, + self.rect.height, + ); + assert!(self.rect.x + rect.x >= self.bounds.x); + assert!(rect.x + self.rect.x <= self.bounds.width as isize + + self.bounds.x); + assert!(self.rect.y + rect.y >= self.bounds.y); + assert!(rect.y + self.rect.y <= self.bounds.height as isize + + self.bounds.y); + let data = unsafe { + self.data.offset(rect.y * self.plane_cfg.stride as isize + rect.x) + }; + let absolute_rect = Rect { + x: self.rect.x + rect.x, + y: self.rect.y + rect.y, + width: rect.width, + height: rect.height, + }; + PlaneRegion { + data, + plane_cfg: &self.plane_cfg, + rect: absolute_rect, + bounds: self.bounds, phantom: PhantomData, } } @@ -333,7 +505,7 @@ macro_rules! plane_region_common { unsafe impl Send for $name<'_, T> {} unsafe impl Sync for $name<'_, T> {} - impl Index for $name<'_, T> { + impl<'a, T: Pixel> Index for $name<'a, T> { type Output = [T]; #[inline(always)] @@ -345,6 +517,18 @@ macro_rules! plane_region_common { } } } + + impl<'a, T: Pixel> Index for $name<'a, T> { + type Output = PadRow<'a, T>; + #[inline(always)] + fn index(&self, index: PadIndex) -> &Self::Output { + assert!(index.0 >= self.bounds.y - self.rect.y); + assert!(index.0 < self.bounds.height as isize + self.bounds.y - self.rect.y); + unsafe { + &*(slice::from_raw_parts(self, (index.0 + self.rect.y - self.bounds.y + 1) as usize) as *const [Self] as *const PadRow<'a, T>) + } + } + } } } @@ -416,6 +600,43 @@ impl<'a, T: Pixel> PlaneRegionMut<'a, T> { data, plane_cfg: self.plane_cfg, rect: absolute_rect, + bounds: self.bounds, + phantom: PhantomData, + } + } + + #[inline(always)] + // as with subregion_mut above, but allows re-expanding the region. + // This will _not_ allow expanding a region beyond the original + // rectangle created with new() or from_slice(). As such, it + // protects the original Tile/Frame boundaries. + pub fn superregion_mut(&mut self, area: Area) -> PlaneRegionMut<'_, T> { + let rect = area.to_rect( + self.plane_cfg.xdec, + self.plane_cfg.ydec, + self.rect.width, + self.rect.height, + ); + assert!(self.rect.x + rect.x >= self.bounds.x); + assert!(rect.x + self.rect.x <= self.bounds.width as isize + + self.bounds.x); + assert!(self.rect.y + rect.y >= self.bounds.y); + assert!(rect.y + self.rect.y <= self.bounds.height as isize + + self.bounds.y); + let data = unsafe { + self.data.offset(rect.y * self.plane_cfg.stride as isize + rect.x) + }; + let absolute_rect = Rect { + x: self.rect.x + rect.x, + y: self.rect.y + rect.y, + width: rect.width, + height: rect.height, + }; + PlaneRegionMut { + data, + plane_cfg: &self.plane_cfg, + rect: absolute_rect, + bounds: self.bounds, phantom: PhantomData, } } @@ -426,6 +647,7 @@ impl<'a, T: Pixel> PlaneRegionMut<'a, T> { data: self.data, plane_cfg: self.plane_cfg, rect: self.rect, + bounds: self.bounds, phantom: PhantomData, } } @@ -549,6 +771,7 @@ impl<'a, T: Pixel> Iterator for VertWindows<'a, T> { data: self.data, plane_cfg: self.plane_cfg, rect: self.output_rect, + bounds: self.output_rect, phantom: PhantomData, }; self.data = unsafe { self.data.add(self.plane_cfg.stride) }; @@ -583,6 +806,7 @@ impl<'a, T: Pixel> Iterator for HorzWindows<'a, T> { data: self.data, plane_cfg: self.plane_cfg, rect: self.output_rect, + bounds: self.output_rect, phantom: PhantomData, }; self.data = unsafe { self.data.add(1) }; diff --git a/src/tiling/tile.rs b/src/tiling/tile.rs index b621fddb2e..fbfe824b42 100644 --- a/src/tiling/tile.rs +++ b/src/tiling/tile.rs @@ -131,6 +131,47 @@ macro_rules! tile_common { ], } } + + #[inline(always)] + pub fn restrict(mut self) -> Self { + self.planes[0].restrict(); + self.planes[1].restrict(); + self.planes[2].restrict(); + self + } + + // Return a view to a subregion of the Tile + // + // The subregion must be included in (i.e. must not exceed) this Tile. + // + // It is described by an `Area`, relative to the luma plane of + // this region. + #[inline(always)] + pub fn subregion(&self, area: Area) -> Tile<'_, T> { + let tile_rect = area.to_rect( + 0, + 0, + self.planes[0].rect().width, + self.planes[0].rect().height, + ); + Tile { + planes: { + let sub_plane = |pli: usize| { + let plane = &self.planes[pli]; + let &PlaneConfig { xdec, ydec, .. } = self.planes[pli].plane_cfg; + let rect = tile_rect.decimated(xdec, ydec); + assert!(rect.x >= 0 && rect.x as usize <= plane.rect().width); + assert!(rect.y >= 0 && rect.y as usize <= plane.rect().height); + assert!(rect.x as usize + rect.width <= + plane.rect().x as usize + plane.rect().width); + assert!(rect.y as usize + rect.height <= + plane.rect().y as usize + plane.rect().height); + plane.subregion(rect.to_area()) + }; + [sub_plane(0), sub_plane(1), sub_plane(2)] + }, + } + } } } } diff --git a/src/tiling/tile_state.rs b/src/tiling/tile_state.rs index 7866ee2d4e..78d36315d7 100644 --- a/src/tiling/tile_state.rs +++ b/src/tiling/tile_state.rs @@ -160,11 +160,11 @@ impl<'a, T: Pixel> TileStateMut<'a, T> { width, height, input: &fs.input, - input_tile: Tile::new(&fs.input, luma_rect), + input_tile: Tile::new(&fs.input, luma_rect).restrict(), input_hres: &fs.input_hres, input_qres: &fs.input_qres, deblock: &fs.deblock, - rec: TileMut::new(Arc::make_mut(&mut fs.rec), luma_rect), + rec: TileMut::new(Arc::make_mut(&mut fs.rec), luma_rect).restrict(), qc: Default::default(), segmentation: &fs.segmentation, restoration: TileRestorationStateMut::new(