From 5425029819212bce4c5f17f9890a3479c343aea8 Mon Sep 17 00:00:00 2001 From: Purdea Andrei Date: Sun, 1 Sep 2024 09:09:26 +0300 Subject: [PATCH] gateware.iostream.IOStreamer: let o_stream transfer if i_stream not rdy This change allows o_stream to take at least one transfer, as long as there are no sample transfers in flight. This will prevent deadlocks, for example if i_stream is implemented with something like: ```m.d.comb += ready.eq(valid)``` Which is valid according to stream rules. This commit also adds tests for this. --- software/glasgow/gateware/iostream.py | 13 ++++--- software/tests/gateware/test_iostream.py | 43 ++++++++++++++++++++---- 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/software/glasgow/gateware/iostream.py b/software/glasgow/gateware/iostream.py index 6556a63bd..615256a04 100644 --- a/software/glasgow/gateware/iostream.py +++ b/software/glasgow/gateware/iostream.py @@ -168,15 +168,18 @@ def elaborate(self, platform): m.d.comb += buffer_parts.oe.eq(latch_parts.oe) def delay(value, name): + delayed_values = [] for stage in range(latency): next_value = Signal.like(value, name=f"{name}_{stage}") m.d.sync += next_value.eq(value) value = next_value - return value + delayed_values.append(next_value) + return delayed_values - i_en = delay(self.o_stream.valid & self.o_stream.ready & - self.o_stream.p.i_en, name="i_en") - meta = delay(self.o_stream.p.meta, name="meta") + i_en_delays = delay(self.o_stream.valid & self.o_stream.ready & + self.o_stream.p.i_en, name="i_en") + i_en = i_en_delays[-1] + meta = delay(self.o_stream.p.meta, name="meta")[-1] # This skid buffer is organized as a shift register to avoid any uncertainties associated # with the use of an async read memory. On platforms that have LUTRAM, this implementation @@ -207,7 +210,7 @@ def delay(value, name): m.d.comb += self.i_stream.payload.eq(skid[skid_at]) m.d.comb += self.i_stream.valid.eq(i_en | (skid_at != 0)) - m.d.comb += self.o_stream.ready.eq(self.i_stream.ready & (skid_at == 0)) + m.d.comb += self.o_stream.ready.eq(self.i_stream.ready | ~((skid_at!=0) | Cat(*i_en_delays).any())) return m diff --git a/software/tests/gateware/test_iostream.py b/software/tests/gateware/test_iostream.py index 4fcef1ee0..2b50d982f 100644 --- a/software/tests/gateware/test_iostream.py +++ b/software/tests/gateware/test_iostream.py @@ -7,6 +7,8 @@ from glasgow.gateware.ports import PortGroup from glasgow.gateware.iostream import IOStreamer +MAX_SKIDBUFFER_SIZE = 4 + async def stream_get(ctx, stream): """ This helper coroutine takes one payload from the stream. @@ -51,7 +53,7 @@ class IOStreamTimeoutError(Exception): pass class IOStreamTestCase(unittest.TestCase): - def _subtest_sdr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None): + def _subtest_sdr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None, iready_comb_path=False): """ This is a latency-agnostic test, that verifies that the IOStreamer samples the inputs at the same time as the output signals change. @@ -80,6 +82,9 @@ def _subtest_sdr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_ timeout_clocks: The number of clock cycles we expect the testcase to complete. If None (default), then an appropriate amount of clock cycles is calculated. + + iready_comb_path: If true, specifies that istream.ready should be controlled combinatorially, and should only go high in response to istream.valid. + Stream rules allow logic that is connected to the istream to behave this way. """ ports = PortGroup() ports.clk_out = io.SimulationPort("o", 1) @@ -127,7 +132,7 @@ async def i_stream_consumer_tb(ctx): """ nonlocal i_stream_consumer_finished for i_ready_bit in i_ready_bits: - if i_ready_bit == "1": + if i_ready_bit == "1" and (not iready_comb_path or ctx.get(dut.i_stream.valid)): payload = await stream_get_maybe(ctx,dut.i_stream) if payload is not None: actually_sampled.append(payload.port.data_in.i) @@ -202,7 +207,7 @@ def test_sdr_input_sampled_correctly(self): i_en_bits = "010101010", i_ready_bits = "111111111" + ("1"*20)) - def _subtest_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None): + def _subtest_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None, iready_comb_path=False): """ This is a latency-agnostic test, that verifies that the IOStreamer samples the inputs at the same time as the output signals change. @@ -232,6 +237,9 @@ def _subtest_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_ timeout_clocks: The number of clock cycles we expect the testcase to complete. If None (default), then an appropriate amount of clock cycles is calculated. + + iready_comb_path: If true, specifies that istream.ready should be controlled combinatorially, and should only go high in response to istream.valid. + Stream rules allow logic that is connected to the istream to behave this way. """ ports = PortGroup() @@ -287,7 +295,7 @@ async def i_stream_consumer_tb(ctx): """ nonlocal i_stream_consumer_finished for i_ready_bit in i_ready_bits: - if i_ready_bit == "1": + if i_ready_bit == "1" and (not iready_comb_path or ctx.get(dut.i_stream.valid)): payload = await stream_get_maybe(ctx,dut.i_stream) if payload is not None: data = payload.port.data_in.i[0], payload.port.data_in.i[1] @@ -361,9 +369,30 @@ def test_ddr_input_sampled_correctly(self): i_en_bits = "010101010", i_ready_bits = "111111111" + ("1"*20)) - def _subtest_sdr_and_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None): - self._subtest_sdr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks) - self._subtest_ddr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks) + def _subtest_sdr_and_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None, iready_comb_path=False): + self._subtest_sdr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks, iready_comb_path) + self._subtest_ddr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks, iready_comb_path) + + def test_i_ready_low_doesnt_block_when_not_sampling_inputs(self): + """ + This testcase ensures that if i_stream is blocked, we can still perform + non-sampling updates, as long as we never try to sample. + """ + self._subtest_sdr_and_ddr_input_sampled_correctly( + o_valid_bits = "010101010", + i_en_bits = "000000000", + i_ready_bits = "") + + def test_i_ready_low_doesnt_block_when_iready_combinatorial(self): + """ + This testcase ensures that an i_stream consumer, which is following + stream rules, and is allowed to generate ready like this: + ```m.d.comb += ready.eq(valid)```, will not cause the system to lock up + """ + self._subtest_sdr_and_ddr_input_sampled_correctly( + o_valid_bits = "0111100000" + (("0" * MAX_SKIDBUFFER_SIZE) + "0") * 4, + i_en_bits = "0111100000" + (("0" * MAX_SKIDBUFFER_SIZE) + "0") * 4, + i_ready_bits = "0000000000" + (("0" * MAX_SKIDBUFFER_SIZE) + "1") * 4, iready_comb_path = True) def _get_random_bit_string(self, cnt_bits): rint = random.getrandbits(cnt_bits)