Skip to content

Commit

Permalink
gateware.iostream.IOStreamer: let o_stream transfer if i_stream not rdy
Browse files Browse the repository at this point in the history
This change allows o_stream to take at least one transfer, as long as
there are no sample transfers in flight. This will prevent deadlocks,
for example if i_stream is implemented with something like:
```m.d.comb += ready.eq(valid)```
Which is valid according to stream rules.

This commit also adds tests for this.
  • Loading branch information
purdeaandrei committed Sep 13, 2024
1 parent a4d7cd7 commit 5425029
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 12 deletions.
13 changes: 8 additions & 5 deletions software/glasgow/gateware/iostream.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,18 @@ def elaborate(self, platform):
m.d.comb += buffer_parts.oe.eq(latch_parts.oe)

def delay(value, name):
delayed_values = []
for stage in range(latency):
next_value = Signal.like(value, name=f"{name}_{stage}")
m.d.sync += next_value.eq(value)
value = next_value
return value
delayed_values.append(next_value)
return delayed_values

i_en = delay(self.o_stream.valid & self.o_stream.ready &
self.o_stream.p.i_en, name="i_en")
meta = delay(self.o_stream.p.meta, name="meta")
i_en_delays = delay(self.o_stream.valid & self.o_stream.ready &
self.o_stream.p.i_en, name="i_en")
i_en = i_en_delays[-1]
meta = delay(self.o_stream.p.meta, name="meta")[-1]

# This skid buffer is organized as a shift register to avoid any uncertainties associated
# with the use of an async read memory. On platforms that have LUTRAM, this implementation
Expand Down Expand Up @@ -207,7 +210,7 @@ def delay(value, name):

m.d.comb += self.i_stream.payload.eq(skid[skid_at])
m.d.comb += self.i_stream.valid.eq(i_en | (skid_at != 0))
m.d.comb += self.o_stream.ready.eq(self.i_stream.ready & (skid_at == 0))
m.d.comb += self.o_stream.ready.eq(self.i_stream.ready | ~((skid_at!=0) | Cat(*i_en_delays).any()))

return m

Expand Down
43 changes: 36 additions & 7 deletions software/tests/gateware/test_iostream.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from glasgow.gateware.ports import PortGroup
from glasgow.gateware.iostream import IOStreamer

MAX_SKIDBUFFER_SIZE = 4

async def stream_get(ctx, stream):
"""
This helper coroutine takes one payload from the stream.
Expand Down Expand Up @@ -51,7 +53,7 @@ class IOStreamTimeoutError(Exception):
pass

class IOStreamTestCase(unittest.TestCase):
def _subtest_sdr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None):
def _subtest_sdr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None, iready_comb_path=False):
"""
This is a latency-agnostic test, that verifies that the IOStreamer samples the inputs at the same time as the output signals change.
Expand Down Expand Up @@ -80,6 +82,9 @@ def _subtest_sdr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_
timeout_clocks: The number of clock cycles we expect the testcase to complete. If None (default), then an appropriate amount of clock cycles is
calculated.
iready_comb_path: If true, specifies that istream.ready should be controlled combinatorially, and should only go high in response to istream.valid.
Stream rules allow logic that is connected to the istream to behave this way.
"""
ports = PortGroup()
ports.clk_out = io.SimulationPort("o", 1)
Expand Down Expand Up @@ -127,7 +132,7 @@ async def i_stream_consumer_tb(ctx):
"""
nonlocal i_stream_consumer_finished
for i_ready_bit in i_ready_bits:
if i_ready_bit == "1":
if i_ready_bit == "1" and (not iready_comb_path or ctx.get(dut.i_stream.valid)):
payload = await stream_get_maybe(ctx,dut.i_stream)
if payload is not None:
actually_sampled.append(payload.port.data_in.i)
Expand Down Expand Up @@ -202,7 +207,7 @@ def test_sdr_input_sampled_correctly(self):
i_en_bits = "010101010",
i_ready_bits = "111111111" + ("1"*20))

def _subtest_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None):
def _subtest_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None, iready_comb_path=False):
"""
This is a latency-agnostic test, that verifies that the IOStreamer samples the inputs at the same time as the output signals change.
Expand Down Expand Up @@ -232,6 +237,9 @@ def _subtest_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_
timeout_clocks: The number of clock cycles we expect the testcase to complete. If None (default), then an appropriate amount of clock cycles is
calculated.
iready_comb_path: If true, specifies that istream.ready should be controlled combinatorially, and should only go high in response to istream.valid.
Stream rules allow logic that is connected to the istream to behave this way.
"""

ports = PortGroup()
Expand Down Expand Up @@ -287,7 +295,7 @@ async def i_stream_consumer_tb(ctx):
"""
nonlocal i_stream_consumer_finished
for i_ready_bit in i_ready_bits:
if i_ready_bit == "1":
if i_ready_bit == "1" and (not iready_comb_path or ctx.get(dut.i_stream.valid)):
payload = await stream_get_maybe(ctx,dut.i_stream)
if payload is not None:
data = payload.port.data_in.i[0], payload.port.data_in.i[1]
Expand Down Expand Up @@ -361,9 +369,30 @@ def test_ddr_input_sampled_correctly(self):
i_en_bits = "010101010",
i_ready_bits = "111111111" + ("1"*20))

def _subtest_sdr_and_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None):
self._subtest_sdr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks)
self._subtest_ddr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks)
def _subtest_sdr_and_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks=None, iready_comb_path=False):
self._subtest_sdr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks, iready_comb_path)
self._subtest_ddr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks, iready_comb_path)

def test_i_ready_low_doesnt_block_when_not_sampling_inputs(self):
"""
This testcase ensures that if i_stream is blocked, we can still perform
non-sampling updates, as long as we never try to sample.
"""
self._subtest_sdr_and_ddr_input_sampled_correctly(
o_valid_bits = "010101010",
i_en_bits = "000000000",
i_ready_bits = "")

def test_i_ready_low_doesnt_block_when_iready_combinatorial(self):
"""
This testcase ensures that an i_stream consumer, which is following
stream rules, and is allowed to generate ready like this:
```m.d.comb += ready.eq(valid)```, will not cause the system to lock up
"""
self._subtest_sdr_and_ddr_input_sampled_correctly(
o_valid_bits = "0111100000" + (("0" * MAX_SKIDBUFFER_SIZE) + "0") * 4,
i_en_bits = "0111100000" + (("0" * MAX_SKIDBUFFER_SIZE) + "0") * 4,
i_ready_bits = "0000000000" + (("0" * MAX_SKIDBUFFER_SIZE) + "1") * 4, iready_comb_path = True)

def _get_random_bit_string(self, cnt_bits):
rint = random.getrandbits(cnt_bits)
Expand Down

0 comments on commit 5425029

Please sign in to comment.