diff --git a/README.md b/README.md index d16165e..c8db1aa 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,7 @@ structures, in some cases linearly. func (q *Quamina) GetMatcherStats() map[string]float64 ``` The return value is a map with string keys to allow for the addition of metrics in -the future, should they be found useful. At the moment, the only metric known to be valuable +the future, should they be found useful. At the moment, the only metric known to be valuable is the total amount of memory, in bytes, used in the Event-matching data structure; the map key is “bytes”. diff --git a/nfa.go b/nfa.go index 2836379..c7bcaa0 100644 --- a/nfa.go +++ b/nfa.go @@ -144,7 +144,7 @@ func (nb *nfaBuffers) getFieldSet() map[*fieldMatcher]bool { return nb.fieldSet } -// nfa2Dfa does what the name says, but as of 2026/01 is not used. +// nfa2Dfa does what the name says. It relies upon epsilonClosure having been run on the start state func nfa2Dfa(nfaStart *faState) *faState { // The start state always has a trivial epsilon closure (just itself), so we // can assign the self-only sentinel directly. Epsilon transitions (spinner @@ -246,7 +246,7 @@ func traverseDFA(start *faState, val []byte, transitions []*fieldMatcher) []*fie } else { utf8Byte = valueTerminator } - next := table.dStep(utf8Byte) + next := table.step(utf8Byte) if next == nil { break } diff --git a/regexp_nfa_test.go b/regexp_nfa_test.go index 08b1101..da89b21 100644 --- a/regexp_nfa_test.go +++ b/regexp_nfa_test.go @@ -227,7 +227,7 @@ func TestMakeByteDotFA(t *testing.T) { st := makeByteDotFA(dest, sharedNullPrinter) for i := 0; i < 256; i++ { b := byte(i) - got := st.dStep(b) + got := st.step(b) if isForbiddenUTF8(b) { if got != nil { t.Errorf("accepted %x", b) diff --git a/small_table.go b/small_table.go index ec7fb1e..d956fc5 100644 --- a/small_table.go +++ b/small_table.go @@ -86,21 +86,6 @@ func isForbiddenUTF8(b byte) bool { return b == 0xC0 || b == 0xC1 || b >= 0xF5 } -// dStep takes a step through an NFA in the case where it is known that the NFA in question -// is deterministic, i.e. each combination of an faState and a byte value transitions to at -// most one other byte value. -func (t *smallTable) dStep(utf8Byte byte) *faState { - for index, ceiling := range t.ceilings { - if utf8Byte < ceiling { - return t.steps[index] - } - } - if isForbiddenUTF8(utf8Byte) { - return nil - } - panic("Malformed smallTable") -} - // makeSmallTable creates a pre-loaded small table, with all bytes not otherwise specified having the defaultStep // value, and then a few other values with their indexes and values specified in the other two arguments. The // goal is to reduce memory churn diff --git a/small_table_test.go b/small_table_test.go index 4cab429..165bec9 100644 --- a/small_table_test.go +++ b/small_table_test.go @@ -67,7 +67,7 @@ func TestDodgeBadUTF8(t *testing.T) { if got := st.step(0xFE); got != nil { t.Errorf("step(0xFE) = %v, want nil", got) } - if got := st.dStep(0xFE); got != nil { + if got := st.step(0xFE); got != nil { t.Errorf("dStep(0xFE) = %v, want nil", got) } } diff --git a/value_matcher.go b/value_matcher.go index 19d3587..67083f2 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -117,47 +117,41 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer, bufs *closur // value to be wrapped in an faState; makeRegexpNFA and a few NFA builders // return *faState directly. After this switch, newFAState is the start // faState for the new automaton. - var newFAState *faState + var newFA *faState switch val.vType { case stringType, literalType: t, fm := makeStringFA(valBytes, nil, false) - newFAState, nextField = &faState{table: t}, fm + newFA, nextField = &faState{table: t}, fm case numberType: t, fm := makeStringFA(valBytes, nil, true) - newFAState, nextField = &faState{table: t}, fm + newFA, nextField = &faState{table: t}, fm fields.hasNumbers = true case anythingButType: - newFAState, nextField = makeMultiAnythingButFA(val.list) + newFA, nextField = makeMultiAnythingButFA(val.list) case shellStyleType: - newFAState, nextField = makeShellStyleFA(valBytes, printer) + newFA, nextField = makeShellStyleFA(valBytes, printer) fields.isNondeterministic = true case wildcardType: - newFAState, nextField = makeWildCardFA(valBytes, printer) + newFA, nextField = makeWildCardFA(valBytes, printer) fields.isNondeterministic = true case prefixType: t, fm := makePrefixFA(valBytes) - newFAState, nextField = &faState{table: t}, fm + newFA, nextField = &faState{table: t}, fm case monocaseType: - newFAState, nextField = makeMonocaseFA(valBytes, printer) + newFA, nextField = makeMonocaseFA(valBytes, printer) case regexpType: - newFAState, nextField = makeRegexpNFA(val.parsedRegexp, sharedNullPrinter) - if newFAState.table.isNondeterministic() { + newFA, nextField = makeRegexpNFA(val.parsedRegexp, sharedNullPrinter) + if newFA.table.isNondeterministic() { fields.isNondeterministic = true } - printer.labelTable(&newFAState.table, "RX start") + printer.labelTable(&newFA.table, "RX start") default: panic("unknown value type") } // there's already a table, thus an out-degree > 1 if fields.start != nil { - fields.start = mergeStartStates(fields.start, newFAState, printer) - - // in the case where you have just a handful of addTransitions but the memoryBudget - // is tiny, the overrun won't be caught because monitor.sample only checks - // every N calls. So this is to catch that probably-never-happens condition. - // if (bytesAllocated() - mm.baseAlloc) > mm.headroom { - + fields.start = mergeStartStates(fields.start, newFA, printer) if fields.isNondeterministic { epsilonClosureInto(fields.start, bufs) if buildMode == BuiltForSpeed { @@ -174,10 +168,10 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer, bufs *closur if fields.singletonMatch != nil { // singleton is here, we don't match, so our outdegree becomes 2, so we have // to build an automaton with two values in it. - singletonTable, _ := makeStringFA(fields.singletonMatch, fields.singletonTransition, false) + singletonAutomaton, _ := makeStringFA(fields.singletonMatch, fields.singletonTransition, false) // now table is ready for use, nuke singleton to signal threads to use it - fields.start = mergeStartStates(&faState{table: singletonTable}, newFAState, sharedNullPrinter) + fields.start = mergeStartStates(&faState{table: singletonAutomaton}, newFA, sharedNullPrinter) if fields.isNondeterministic { epsilonClosureInto(fields.start, bufs) if buildMode == BuiltForSpeed { @@ -189,7 +183,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer, bufs *closur fields.singletonTransition = nil } else { // empty valueMatcher, no special cases, just jam in the new FA - fields.start = newFAState + fields.start = newFA if fields.isNondeterministic { epsilonClosureInto(fields.start, bufs) if buildMode == BuiltForSpeed {