Skip to content

Commit

Permalink
Add cache for tokens to the ParseValueCache.
Browse files Browse the repository at this point in the history
May need a rename of the class since it now caches more then parse
values.
  • Loading branch information
mvanaken committed Sep 11, 2024
1 parent dea4274 commit 3ca65dc
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 10 deletions.
3 changes: 1 addition & 2 deletions core/src/main/java/io/parsingdata/metal/data/ParseState.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,8 @@ public static ParseState createFromByteStream(final ByteStream input) {
}

public ParseState addBranch(final Token token) {
return new ParseState(order.addBranch(token), cache, source, offset, token.isIterable() ? iterations.add(new ImmutablePair<>(token, ZERO)) : iterations, references);
return new ParseState(order.addBranch(token), cache.add(token), source, offset, token.isIterable() ? iterations.add(new ImmutablePair<>(token, ZERO)) : iterations, references);
}

public ParseState closeBranch(final Token token) {
if (token.isIterable() && !iterations.head.left.equals(token)) {
throw new IllegalStateException(format("Cannot close branch for iterable token %s. Current iteration state is for token %s.", token.name, iterations.head.left.name));
Expand Down
34 changes: 27 additions & 7 deletions core/src/main/java/io/parsingdata/metal/data/ParseValueCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,24 @@

public class ParseValueCache {

public static final ParseValueCache NO_CACHE = new ParseValueCache(null);
public static final ParseValueCache NO_CACHE = new ParseValueCache(null, null);

private final Map<String, ImmutableList<ParseValue>> cache;
private final Map<String, Token> tokenCache;

/**
* Start a cache that keeps track of values added to the parse graph.
* <p>
* In case no caching is desired, {@link #NO_CACHE} should be used instead.
*/
public ParseValueCache() {
this(new HashMap<>());
this(new HashMap<>(), new HashMap<>());
}

// For internal use only. It is private to avoid setting the cache to null. The NO_CACHE constant should be used instead.
private ParseValueCache(final Map<String, ImmutableList<ParseValue>> cache) {
private ParseValueCache(final Map<String, ImmutableList<ParseValue>> cache, final Map<String, Token> tokenCache) {
this.cache = cache;
this.tokenCache = tokenCache;
}

public Optional<ImmutableList<Value>> find(final String scopeName, int limit) {
Expand All @@ -59,10 +61,23 @@ public ParseValueCache add(final ParseValue value) {
return NO_CACHE;
}
final String name = shortName(value.name);
final Map<String, ImmutableList<ParseValue>> stringImmutableListHashMap = new HashMap<>(cache);
stringImmutableListHashMap.computeIfAbsent(name, pattern -> new ImmutableList<>());
stringImmutableListHashMap.computeIfPresent(name, (pattern, valueImmutableList) -> valueImmutableList.add(value));
return new ParseValueCache(stringImmutableListHashMap);
final Map<String, ImmutableList<ParseValue>> newCache = new HashMap<>(cache);
newCache.computeIfAbsent(name, pattern -> new ImmutableList<>());
newCache.computeIfPresent(name, (pattern, valueImmutableList) -> valueImmutableList.add(value));
return new ParseValueCache(newCache, tokenCache);
}

public ParseValueCache add(final Token token) {
if (this == NO_CACHE) {
return NO_CACHE;
}
final String name = token.name;
if (name.isEmpty()) {
return this;
}
final Map<String, Token> newTokenCache = new HashMap<>(tokenCache);
newTokenCache.put(name, token);
return new ParseValueCache(cache, newTokenCache);
}

private static String shortName(final String name) {
Expand All @@ -87,4 +102,9 @@ public boolean equals(final Object obj) {
public int hashCode() {
return Objects.hash(cache);
}

public Optional<Token> findToken(String referenceName) {
System.out.println("ParseValueCache.findToken");
return Optional.ofNullable(tokenCache.get(referenceName));
}
}
3 changes: 2 additions & 1 deletion core/src/main/java/io/parsingdata/metal/token/TokenRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ public TokenRef(final String name, final String referenceName, final Encoding en

@Override
protected Optional<ParseState> parseImpl(final Environment environment) {
return lookup(ImmutableList.create(environment.parseState.order), referenceName).computeResult().parse(environment);
// return lookup(ImmutableList.create(environment.parseState.order), referenceName).computeResult().parse(environment);
return environment.parseState.cache.findToken(referenceName).orElse(LOOKUP_FAILED).parse(environment);
}

private Trampoline<Token> lookup(final ImmutableList<ParseItem> items, final String referenceName) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
package io.parsingdata.metal.data;

import static io.parsingdata.metal.Shorthand.CURRENT_ITERATION;
import static io.parsingdata.metal.Shorthand.eqNum;
import static io.parsingdata.metal.Shorthand.last;
import static io.parsingdata.metal.Shorthand.nod;
import static io.parsingdata.metal.Shorthand.opt;
import static io.parsingdata.metal.Shorthand.rep;
import static io.parsingdata.metal.Shorthand.repn;
import static io.parsingdata.metal.Shorthand.seq;
import static io.parsingdata.metal.Shorthand.tie;
import static io.parsingdata.metal.Shorthand.token;
import static io.parsingdata.metal.Shorthand.when;
import static io.parsingdata.metal.data.ParseState.createFromByteStream;
import static io.parsingdata.metal.util.EnvironmentFactory.env;
import static io.parsingdata.metal.util.TokenDefinitions.any;
import static java.math.BigInteger.ZERO;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
Expand All @@ -20,12 +34,15 @@
import static io.parsingdata.metal.data.Slice.createFromBytes;
import static io.parsingdata.metal.util.EncodingFactory.enc;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Stream;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
Expand All @@ -34,6 +51,7 @@
import io.parsingdata.metal.expression.value.Value;
import io.parsingdata.metal.expression.value.ValueExpression;
import io.parsingdata.metal.token.Token;
import io.parsingdata.metal.util.InMemoryByteStream;

class ParseValueCacheTest {

Expand Down Expand Up @@ -189,4 +207,37 @@ public void cacheUsageTest(final String testName, final ValueExpression expressi
// That means, if result is not empty, the cache was used.
assertEquals(shouldUseCache, !eval.isEmpty());
}

// Note: This timeout does not stop the test after 1 second.
// The test will run until it finishes and then validate the duration.
@Timeout(value = 50)
@Test
void performanceTest() {
// This test would take way too much time without tokenref caching (~17sec).
// Using tokenref cashing, these are all finished within less than 100 ms.
final int dataSize = 1_000_000;
final byte[] input = new byte[dataSize + 2 + 3];
// This token contains recursive tokens to create large ParseGraphs.
final Token deep =
seq(
seq("tokenref",
def("data1", 1),
def("data2", 1)
),
rep("token",
seq("seq",
seq(
def("byte", 1),
nod(0)
),
when(token("tokenref"), eqNum(CURRENT_ITERATION, con(dataSize)))
)
)
);
final Optional<ParseState> result = deep.parse(env(createFromByteStream(new InMemoryByteStream(input))));
assertTrue(result.isPresent());

ImmutableList<ParseValue> allValues = Selection.getAllValues(result.get().order, x -> true);
assertThat(allValues.size, equalTo(dataSize + 5L));
}
}

0 comments on commit 3ca65dc

Please sign in to comment.