Skip to content

Commit

Permalink
tiliqua_soc: experiment with FPU plugin
Browse files Browse the repository at this point in the history
* add simple `fpu_bench` to selftest
* bench records 908998.44 samples/sec (~10x faster than no FPU)
* LUT usage 62% (was 44%)
  • Loading branch information
vk2seb committed Nov 7, 2024
1 parent b1aa28e commit 89d70bf
Show file tree
Hide file tree
Showing 10 changed files with 10,925 additions and 951 deletions.
4 changes: 2 additions & 2 deletions gateware/src/top/selftest/fw/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[target.riscv32imac-unknown-none-elf]
[target.riscv32imafc-unknown-none-elf]
rustflags = [
"-C", "link-arg=-Tmemory.x",
"-C", "link-arg=-Tlink.x",
]

[build]
target = "riscv32imac-unknown-none-elf"
target = "riscv32imafc-unknown-none-elf"
108 changes: 108 additions & 0 deletions gateware/src/top/selftest/fw/src/fpu_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Some simple audio DSP for FPU benchmarking taken from:
// https://github.com/sourcebox/mi-plaits-dsp-rs

#[inline]
pub fn soft_limit(x: f32) -> f32 {
x * (27.0 + x * x) / (27.0 + 9.0 * x * x)
}

#[inline]
pub fn soft_clip(x: f32) -> f32 {
if x < -3.0 {
-1.0
} else if x > 3.0 {
1.0
} else {
soft_limit(x)
}
}

#[derive(Debug)]
pub struct ParameterInterpolator<'a> {
state: &'a mut f32,
value: f32,
increment: f32,
}

impl<'a> ParameterInterpolator<'a> {
pub fn new(state: &'a mut f32, new_value: f32, size: usize) -> Self {
let v = *state;
Self {
state,
value: v,
increment: (new_value - v) / (size as f32),
}
}

pub fn new_with_step(state: &'a mut f32, new_value: f32, step: f32) -> Self {
let v = *state;
Self {
state,
value: v,
increment: (new_value - v) * step,
}
}

pub fn init(&mut self, state: &'a mut f32, new_value: f32, size: usize) {
let v = *state;
self.state = state;
self.value = v;
self.increment = (new_value - v) / (size as f32);
}

#[inline]
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> f32 {
self.value += self.increment;
self.value
}

#[inline]
pub fn subsample(&self, t: f32) -> f32 {
self.value + self.increment * t
}
}

impl<'a> Drop for ParameterInterpolator<'a> {
fn drop(&mut self) {
*self.state = self.value;
}
}

pub struct Overdrive {
pre_gain: f32,
post_gain: f32,
}

impl Overdrive {
pub fn new() -> Self {
Overdrive { pre_gain: 0.0f32, post_gain: 0.0f32 }
}

pub fn init(&mut self) {
self.pre_gain = 0.0;
self.post_gain = 0.0;
}

#[inline]
pub fn process(&mut self, drive: f32, in_out: &mut [f32]) {
let drive_2 = drive * drive;
let pre_gain_a = drive * 0.5;
let pre_gain_b = drive_2 * drive_2 * drive * 24.0;
let pre_gain = pre_gain_a + (pre_gain_b - pre_gain_a) * drive_2;
let drive_squashed = drive * (2.0 - drive);
let post_gain = 1.0 / soft_clip(0.33 + drive_squashed * (pre_gain - 0.33));

let mut pre_gain_modulation =
ParameterInterpolator::new(&mut self.pre_gain, pre_gain, in_out.len());

let mut post_gain_modulation =
ParameterInterpolator::new(&mut self.post_gain, post_gain, in_out.len());

for in_out_sample in in_out.iter_mut() {
let pre = pre_gain_modulation.next() * *in_out_sample;
*in_out_sample = soft_clip(pre) * post_gain_modulation.next();
}
}
}

1 change: 1 addition & 0 deletions gateware/src/top/selftest/fw/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ tiliqua_hal::impl_encoder! {

pub mod handlers;
pub mod opts;
pub mod fpu_bench;
23 changes: 23 additions & 0 deletions gateware/src/top/selftest/fw/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,27 @@ where
.draw(d).ok();
}

fn fpu_bench(timer: &mut Timer0) {
let mut od = fpu_bench::Overdrive::new();
let mut data: [f32; 512] = [0.5f32; 512];
let n_blocks: usize = 16;

timer.enable();
timer.set_timeout_ticks(0xFFFFFFFF);

let start = timer.counter();

for _ in 0..n_blocks {
od.process(1.0f32, &mut data);
}

let ticks = start-timer.counter();
let sysclk = pac::clock::sysclk();
// pointless, just use above array so above isn't optimized out.
info!("sum {}", data.into_iter().sum::<f32>());
info!("fpu_bench: {} samples/sec", ((sysclk as f32) * (data.len()*n_blocks) as f32) / (ticks as f32));
}

#[entry]
fn main() -> ! {
let peripherals = pac::Peripherals::take().unwrap();
Expand All @@ -299,6 +320,8 @@ fn main() -> ! {
// FIXME: use proper atomic bus sharing!!
let i2cdev2 = I2c0::new(unsafe { pac::I2C0::steal() } );

fpu_bench(&mut timer);

psram_memtest(&mut timer);

spiflash_memtest(&mut timer);
Expand Down
3 changes: 2 additions & 1 deletion gateware/src/top/selftest/top.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@

if __name__ == "__main__":
this_path = os.path.dirname(os.path.realpath(__file__))
top_level_cli(TiliquaSoc, path=this_path)
top_level_cli(TiliquaSoc, path=this_path,
argparse_fragment=lambda _: {"mainram_size": 0x10000})
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import spinal.lib.sim.Phase

import scala.collection.mutable.ArrayBuffer

import vexriscv.ip.fpu.{FpuCore, FpuParameter}

object GenCoreCynthion {
def main(args: Array[String]) {
val outputFile = "vexriscv_cynthion"
Expand Down Expand Up @@ -91,6 +93,12 @@ object GenCoreCynthion {
new YamlPlugin(outputFile + ".yaml"),
new MulPlugin,
new DivPlugin,
new FpuPlugin(
externalFpu = false,
p = FpuParameter(
withDouble = false
)
),
new ExternalInterruptArrayPlugin(
machineMaskCsrId = 0xbc0,
machinePendingsCsrId = 0xfc0,
Expand Down
Loading

0 comments on commit 89d70bf

Please sign in to comment.