diff options
-rw-r--r-- | src/lib.rs | 44 |
1 files changed, 38 insertions, 6 deletions
diff --git a/src/lib.rs b/src/lib.rs index 5e2affe..87847ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,16 +112,48 @@ impl SealedKey { // Construct Aho-Corasick automaton // TODO See if this can be done at compile time let ac = AhoCorasick::builder() - .match_kind(MatchKind::LeftmostLongest) .ascii_case_insensitive(true) .build(DICTIONARY) .unwrap(); - // Stream decode the input - let indices = ac - .find_iter(&alpha) - .map(|m| m.pattern().as_usize()) - .collect::<Vec<_>>(); + // Find all overlapping matches + let mut matches = ac.find_overlapping_iter(&alpha).collect::<Vec<_>>(); + + // Decode using dynamic programming + matches.sort_by(|m, n| m.end().cmp(&n.end())); + + let mut p = Vec::with_capacity(matches.len()); + for &m in &matches { + let pv = matches.binary_search_by(|n| n.end().cmp(&m.start())); + p.push(pv.ok()); + } + + let mut opt: Vec<usize> = Vec::with_capacity(matches.len()); + for i in 0..matches.len() { + if let Some(pv) = p[i] { + opt.push( + (matches[i].len() + opt[pv]) + .max(opt.get(i.saturating_sub(1)).copied().unwrap_or(0)), + ); + } else { + opt.push(opt.get(i.saturating_sub(1)).copied().unwrap_or(0)); + } + } + + let mut indices = Vec::with_capacity(20); + let mut index = opt.len() - 1; + while indices.len() < 20 { + let pv = p[index].unwrap_or(0); + + if matches[index].len() + opt[pv] > opt[index.saturating_sub(1)] { + indices.push(matches[index].pattern().as_usize()); + index = pv; + } else { + index = index.saturating_sub(1); + } + } + + indices.reverse(); // Check length if indices.len() != 20 { |