diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9e62630d17db..e049644f6159 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,9 @@ Bug Fixes
   the total count for a dimension. Prior to this fix, multi-value docs could contribute a > 1
   count to the dimension count. (Greg Miller)
 
+* LUCENE-9963: Fixed bug where FlattenGraphFilter drops tokens or crashes from incorrect 
+  gap recovery. (Geoffrey Lawson)
+
 Other
 ---------------------
 
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java
index 01e1f6f7dfc1..13a4085b1fc9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.lucene.analysis.TokenFilter;
@@ -66,9 +67,13 @@ private final static class InputNode implements RollingBuffer.Resettable {
      *  to know when we can freeze. */
     int maxToNode = -1;
 
-    /** Where we currently map to; this changes (can only
-     *  increase as we see more input tokens), until we are finished
-     *  with this position. */
+    /** Minimum to input node for all tokens leaving here; we use this to check if holes exist. */
+    int minToNode = Integer.MAX_VALUE;
+
+    /**
+     * Where we currently map to; this changes (can only increase as we see more input tokens),
+     * until we are finished with this position.
+     */
     int outputNode = -1;
 
     /** Which token (index into {@link #tokens}) we will next output. */
@@ -80,6 +85,7 @@ public void reset() {
       node = -1;
       outputNode = -1;
       maxToNode = -1;
+      minToNode = Integer.MAX_VALUE;
       nextOut = 0;
     }
   }
@@ -188,14 +194,21 @@ private boolean releaseBufferedToken() {
         }
         if (inputNode.tokens.size() == 0) {
           assert inputNode.nextOut == 0;
-          assert output.nextOut == 0;
           // Hole dest nodes should never be merged since 1) we always
           // assign them to a new output position, and 2) since they never
-          // have arriving tokens they cannot be pushed:
-          assert output.inputNodes.size() == 1: output.inputNodes.size();
-          outputFrom++;
-          inputNodes.freeBefore(output.inputNodes.get(0));
-          outputNodes.freeBefore(outputFrom);
+          // have arriving tokens they cannot be pushed. Skip them but don't free
+          // input until all are checked.
+          // Related tests testAltPathLastStepLongHole, testAltPathLastStepHoleFollowedByHole,
+          // testAltPathLastStepHoleWithoutEndToken
+          if (output.inputNodes.size() > 1) {
+            output.nextOut++;
+            if (output.nextOut < output.inputNodes.size()) {
+              continue;
+            }
+          }
+          // Don't free from a hole src. Since no edge leaves here book keeping may be incorrect.
+          // Later output nodes may point to earlier input nodes. So we don't want to free them yet.
+          freeBefore(output);
           continue;
         }
 
@@ -234,9 +247,7 @@ private boolean releaseBufferedToken() {
         if (inputNode.nextOut == inputNode.tokens.size()) {
           output.nextOut++;
           if (output.nextOut == output.inputNodes.size()) {
-            outputFrom++;
-            inputNodes.freeBefore(output.inputNodes.get(0));
-            outputNodes.freeBefore(outputFrom);
+            freeBefore(output);
           }
         }
 
@@ -250,6 +261,30 @@ private boolean releaseBufferedToken() {
     return false;
   }
 
+  /**
+   * Free inputs nodes before the minimum input node for the given output.
+   *
+   * @param output target output node
+   */
+  private void freeBefore(OutputNode output) {
+    /* We've released all of the tokens that end at the current output, so free all output nodes before this.
+    Input nodes are more complex. The second shingled tokens with alternate paths can appear later in the output graph
+    than some of their alternate path tokens. Because of this case we can only free from the minimum because
+    the minimum node will have come from before the second shingled token.
+    This means we have to hold onto input nodes whose tokens get stacked on previous nodes until
+    we've completely passed those inputs.
+    Related tests testShingledGap, testShingledGapWithHoles
+    */
+    outputFrom++;
+    int freeBefore = Collections.min(output.inputNodes);
+    // This will catch a node being freed early if it is input to the next output.
+    // Could a freed early node be input to a later output?
+    assert outputNodes.get(outputFrom).inputNodes.stream().filter(n -> freeBefore > n).count() == 0
+        : "FreeBefore " + freeBefore + " will free in use nodes";
+    inputNodes.freeBefore(freeBefore);
+    outputNodes.freeBefore(outputFrom);
+  }
+
   @Override
   public boolean incrementToken() throws IOException {
     //System.out.println("\nF.increment inputFrom=" + inputFrom + " outputFrom=" + outputFrom);
@@ -267,7 +302,8 @@ public boolean incrementToken() throws IOException {
 
       if (input.incrementToken()) {
         // Input node this token leaves from:
-        inputFrom += posIncAtt.getPositionIncrement();
+        int positionIncrement = posIncAtt.getPositionIncrement();
+        inputFrom += positionIncrement;
 
         int startOffset = offsetAtt.startOffset();
         int endOffset = offsetAtt.endOffset();
@@ -278,27 +314,44 @@ public boolean incrementToken() throws IOException {
 
         InputNode src = inputNodes.get(inputFrom);
         if (src.node == -1) {
-          // This means the "from" node of this token was never seen as a "to" node,
-          // which should only happen if we just crossed a hole.  This is a challenging
-          // case for us because we normally rely on the full dependencies expressed
-          // by the arcs to assign outgoing node IDs.  It would be better if tokens
-          // were never dropped but instead just marked deleted with a new
-          // TermDeletedAttribute (boolean valued) ... but until that future, we have
-          // a hack here to forcefully jump the output node ID:
-          assert src.outputNode == -1;
-          src.node = inputFrom;
-
-          src.outputNode = outputNodes.getMaxPos() + 1;
-          //System.out.println("    hole: force to outputNode=" + src.outputNode);
-          OutputNode outSrc = outputNodes.get(src.outputNode);
+          recoverFromHole(src, startOffset, positionIncrement);
 
-          // Not assigned yet:
-          assert outSrc.node == -1;
-          outSrc.node = src.outputNode;
-          outSrc.inputNodes.add(inputFrom);
-          outSrc.startOffset = startOffset;
         } else {
           OutputNode outSrc = outputNodes.get(src.outputNode);
+          /* If positionIncrement > 1 and the position we're incrementing from doesn't come to the current node we've crossed a hole.
+           * The long edge will point too far back and not account for the holes unless it gets fixed.
+           * example:
+           *  _____abc______
+           * |              |
+           * |              V
+           * O-a->O- ->O- ->O-d->O
+           *
+           * A long edge may have already made this fix though, if src is more than 1 position ahead in the output there's no additional work to do
+           * example:
+           *  _____abc______
+           * |    ....bc....|
+           * |    .        VV
+           * O-a->O- ->O- ->O-d->O
+           */
+          if (positionIncrement > 1
+              && src.outputNode - inputNodes.get(inputFrom - positionIncrement).outputNode <= 1
+              && inputNodes.get(inputFrom - positionIncrement).minToNode != inputFrom) {
+            /* If there was a hole at the end of an alternate path then the input and output nodes
+             * have been created,
+             * but the offsets and increments have not been maintained correctly. Here we go back
+             * and fix them.
+             * Related test testAltPathLastStepHole
+             * The last node in the alt path didn't arrive to remove this reference.
+             */
+            assert inputNodes.get(inputFrom).tokens.isEmpty() : "about to remove non empty edge";
+            outSrc.inputNodes.remove(Integer.valueOf(inputFrom));
+            src.outputNode = -1;
+            int prevEndOffset = outSrc.endOffset;
+
+            outSrc = recoverFromHole(src, startOffset, positionIncrement);
+            outSrc.endOffset = prevEndOffset;
+          }
+
           if (outSrc.startOffset == -1 || startOffset > outSrc.startOffset) {
             // "shrink wrap" the offsets so the original tokens (with most
             // restrictive offsets) win:
@@ -309,6 +362,7 @@ public boolean incrementToken() throws IOException {
         // Buffer this token:
         src.tokens.add(captureState());
         src.maxToNode = Math.max(src.maxToNode, inputTo);
+        src.minToNode = Math.min(src.minToNode, inputTo);
         maxLookaheadUsed = Math.max(maxLookaheadUsed, inputNodes.getBufferSize());
 
         InputNode dest = inputNodes.get(inputTo);
@@ -353,6 +407,55 @@ public boolean incrementToken() throws IOException {
     }
   }
 
+  private OutputNode recoverFromHole(InputNode src, int startOffset, int posinc) {
+    // This means the "from" node of this token was never seen as a "to" node,
+    // which should only happen if we just crossed a hole.  This is a challenging
+    // case for us because we normally rely on the full dependencies expressed
+    // by the arcs to assign outgoing node IDs.  It would be better if tokens
+    // were never dropped but instead just marked deleted with a new
+    // TermDeletedAttribute (boolean valued) ... but until that future, we have
+    // a hack here to forcefully jump the output node ID:
+    assert src.outputNode == -1;
+    src.node = inputFrom;
+
+    int outIndex;
+    int previousInputFrom = inputFrom - posinc;
+    if (previousInputFrom >= 0) {
+      InputNode offsetSrc = inputNodes.get(previousInputFrom);
+      /* Select output src node. Need to make sure the new output node isn't placed too far ahead.
+       * If a disconnected node is placed at the end of the output graph that may place it after output nodes that map to input nodes that are after src in the input.
+       * Since it is disconnected there is no path to it, and there could be holes after meaning no paths to following nodes. This "floating" edge will cause problems in FreeBefore.
+       * In the following section make sure the edge connects to something.
+       * Related test testLongHole testAltPathLastStepHoleFollowedByHole, testAltPathFirstStepHole, testShingledGapWithHoles
+       */
+      if (offsetSrc.minToNode < inputFrom) {
+        // There is a possible path to this node.
+        // place this node one position off from the possible path keeping a 1 inc gap.
+        // Can't be larger than 1 inc or risk getting disconnected.
+        outIndex = inputNodes.get(offsetSrc.minToNode).outputNode + 1;
+      } else {
+        // no information about how the current node was previously connected.
+        // Connect it to the end.
+        outIndex = outputNodes.getMaxPos();
+      }
+    } else {
+      // in case the first token in the stream is a hole we have no input node to increment from.
+      outIndex = outputNodes.getMaxPos() + 1;
+    }
+    OutputNode outSrc = outputNodes.get(outIndex);
+    src.outputNode = outIndex;
+
+    // OutSrc may have other inputs
+    if (outSrc.node == -1) {
+      outSrc.node = src.outputNode;
+      outSrc.startOffset = startOffset;
+    } else {
+      outSrc.startOffset = Math.max(startOffset, outSrc.startOffset);
+    }
+    outSrc.inputNodes.add(inputFrom);
+    return outSrc;
+  }
+
   // Only for debugging:
   /*
   private void printStates() {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
index c69bcca9cf89..f86c3b42f327 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
@@ -17,13 +17,33 @@
 
 package org.apache.lucene.analysis.core;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AutomatonToTokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.TokenStreamToAutomaton;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.Transition;
 
 public class TestFlattenGraphFilter extends BaseTokenStreamTestCase {
   
@@ -195,7 +215,6 @@ public void testSimpleHole() throws Exception {
 
     TokenStream out = new FlattenGraphFilter(in);
 
-    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
     assertTokenStreamContents(out,
                               new String[] {"hello", "hole", "fun"},
                               new int[] {0, 6, 11},
@@ -277,8 +296,689 @@ public void testTwoLongParallelPaths() throws Exception {
                               new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
                               new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
                               11);
-    
   }
 
+  // b has a posInc of 1, which is correct, but no edge ever visited that node.
+  // After hole recovery 'b' and 'c' should still be under 'abc'
+  // assert disabled = pos length of abc = 4
+  // assert enabled = AssertionError: outputEndNode=3 vs inputTo=2
+  public void testAltPathFirstStepHole() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            3,
+            new Token[] {token("abc", 1, 3, 0, 3), token("b", 1, 1, 1, 2), token("c", 1, 1, 2, 3)});
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "b", "c"},
+        new int[] {0, 1, 2},
+        new int[] {3, 2, 3},
+        new int[] {1, 1, 1},
+        new int[] {3, 1, 1},
+        3);
+  }
+
+  // Last node in an alt path fixes outputnode of long path. In this graph the follow up node fixes
+  // that.
+  // incorrect pos length of abc = 1
+  public void testAltPathLastStepHole() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            4,
+            new Token[] {
+              token("abc", 1, 3, 0, 3),
+              token("a", 0, 1, 0, 1),
+              token("b", 1, 1, 1, 2),
+              token("d", 2, 1, 3, 4)
+            });
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "a", "b", "d"},
+        new int[] {0, 0, 1, 3},
+        new int[] {1, 1, 2, 4},
+        new int[] {1, 0, 1, 2},
+        new int[] {3, 1, 1, 1},
+        4);
+  }
+
+  // Check to see how multiple holes in a row are preserved.
+  public void testLongHole() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            28,
+            new Token[] {
+              token("hello", 1, 1, 0, 5), token("hole", 5, 1, 20, 24), token("fun", 1, 1, 25, 28),
+            });
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"hello", "hole", "fun"},
+        new int[] {0, 20, 25},
+        new int[] {5, 24, 28},
+        new int[] {1, 2, 1},
+        new int[] {1, 1, 1},
+        28);
+  }
+
+  // multiple nodes missing in the alt path.
+  // assert disabled = nothing
+  // assert enabled = AssertionError
+  public void testAltPathLastStepLongHole() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            4,
+            new Token[] {token("abc", 1, 3, 0, 3), token("a", 0, 1, 0, 1), token("d", 3, 1, 3, 4)});
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "a", "d"},
+        new int[] {0, 0, 3},
+        new int[] {1, 1, 4},
+        new int[] {1, 0, 2},
+        new int[] {2, 1, 1},
+        4);
+  }
+
+  // LUCENE-8723
+  // Token stream ends without any edge to fix the long edge's output node
+  // assert disabled = dropped token
+  // assert enabled = AssertionError: 2
+  public void testAltPathLastStepHoleWithoutEndToken() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            2,
+            new Token[] {token("abc", 1, 3, 0, 3), token("a", 0, 1, 0, 1), token("b", 1, 1, 1, 2)});
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "a", "b"},
+        new int[] {0, 0, 1},
+        new int[] {1, 1, 2},
+        new int[] {1, 0, 1},
+        new int[] {1, 1, 1},
+        2);
+  }
+
+  // similar to AltPathLastStepHoleWithoutEndToken, but instead of no token to trigger long path
+  // resolution,
+  // the next token has no way to reference to the long path so we have to resolve as if that last
+  // token wasn't present.
+  public void testAltPathLastStepHoleFollowedByHole() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            5,
+            new Token[] {token("abc", 1, 3, 0, 3), token("b", 1, 1, 1, 2), token("e", 3, 1, 4, 5)});
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "b", "e"},
+        new int[] {0, 1, 4},
+        new int[] {3, 2, 5},
+        new int[] {1, 1, 2},
+        new int[] {1, 1, 1},
+        5);
+  }
+
+  // Two Shingled long paths pass each other which gives a flattened graph with tokens backing up a
+  // lot.
+  public void testShingledGap() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            5,
+            new Token[] {
+              token("abc", 1, 3, 0, 3),
+              token("a", 0, 1, 0, 1),
+              token("b", 1, 1, 1, 2),
+              token("cde", 1, 3, 2, 5),
+              token("d", 1, 1, 3, 4),
+              token("e", 1, 1, 4, 5)
+            });
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "a", "d", "b", "cde", "e"},
+        new int[] {0, 0, 3, 3, 4, 4},
+        new int[] {1, 1, 3, 3, 5, 5},
+        new int[] {1, 0, 1, 0, 1, 0},
+        new int[] {1, 1, 1, 1, 1, 1},
+        5);
+  }
+
+  // With shingles, token order may change during flattening.
+  // We need to be careful not to free input nodes if they still have unreleased edges.
+  // with/without exceptions ArrayIndexOutOfBoundsException
+  public void testShingledGapWithHoles() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            5,
+            new Token[] {
+              token("abc", 1, 3, 0, 3),
+              token("b", 1, 1, 1, 2),
+              token("cde", 1, 3, 2, 5),
+              token("d", 1, 1, 3, 4),
+              token("e", 1, 1, 4, 5)
+            });
+
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "d", "b", "cde", "e"},
+        new int[] {0, 3, 3, 4, 4},
+        new int[] {3, 3, 3, 5, 5},
+        new int[] {1, 1, 0, 1, 0},
+        new int[] {1, 1, 1, 1, 1},
+        5);
+  }
+
+  // When the first token is a hole there is no original token to offset from.
+  public void testFirstTokenHole() throws Exception {
+    TokenStream in = new CannedTokenStream(0, 9, new Token[] {token("start", 2, 1, 0, 5)});
+    TokenStream out = new FlattenGraphFilter(in);
+
+    assertTokenStreamContents(
+        out, new String[] {"start"}, new int[] {0}, new int[] {5}, new int[] {2}, new int[] {1}, 9);
+  }
+
+  // The singled token starts from a hole.
+  // Hole recovery will cause the shingled token to start later in the output than its alternate
+  // paths.
+  // This will result in it being released too early.
+  public void testShingleFromGap() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            9,
+            new Token[] {
+              token("a", 1, 1, 4, 8),
+              token("abc", 0, 3, 4, 7),
+              token("cd", 2, 2, 6, 8),
+              token("d", 1, 1, 7, 8),
+              token("e", 1, 1, 8, 9)
+            });
+    TokenStream out = new FlattenGraphFilter(in);
+    assertTokenStreamContents(
+        out,
+        new String[] {"a", "abc", "d", "cd", "e"},
+        new int[] {4, 4, 7, 7, 8},
+        new int[] {7, 7, 8, 8, 9},
+        new int[] {1, 0, 1, 1, 1},
+        new int[] {1, 1, 2, 1, 1},
+        9);
+  }
+
+  public void testShingledGapAltPath() throws Exception {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            4,
+            new Token[] {
+              token("abc", 1, 3, 0, 3), token("abcd", 0, 4, 0, 4), token("cd", 2, 2, 2, 4),
+            });
+    TokenStream out = new FlattenGraphFilter(in);
+    assertTokenStreamContents(
+        out,
+        new String[] {"abc", "abcd", "cd"},
+        new int[] {0, 0, 2},
+        new int[] {3, 4, 4},
+        new int[] {1, 0, 1},
+        new int[] {1, 2, 1},
+        4);
+  }
+
+  // Lots of shingles and alternate paths connecting to each other. One edge 'c' missing between
+  // 'ab' and 'def'
+  public void testHeavilyConnectedGraphWithGap() throws IOException {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            7,
+            new Token[] {
+              token("a", 1, 1, 0, 1),
+              token("ab", 0, 2, 0, 2),
+              token("abcdef", 0, 6, 0, 6),
+              token("abcd", 0, 4, 0, 4),
+              token("bcdef", 1, 5, 1, 7),
+              token("def", 2, 3, 4, 7),
+              token("e", 1, 1, 5, 6),
+              token("f", 1, 1, 6, 7)
+            });
+    TokenStream out = new FlattenGraphFilter(in);
+    assertTokenStreamContents(
+        out,
+        new String[] {"a", "ab", "abcdef", "abcd", "bcdef", "e", "def", "f"},
+        new int[] {0, 0, 0, 0, 5, 5, 6, 6},
+        new int[] {1, 1, 7, 1, 7, 6, 7, 7},
+        new int[] {1, 0, 0, 0, 1, 0, 1, 0},
+        new int[] {1, 1, 3, 1, 2, 1, 1, 1},
+        7);
+  }
+  // This graph can create a disconnected input node that is farther ahead in the output than its
+  // subsequent input node.
+  // Exceptions: Free too early or dropped tokens.
+  public void testShingleWithLargeLeadingGap() throws IOException {
+    TokenStream in =
+        new CannedTokenStream(
+            0,
+            6,
+            new Token[] {
+              token("abcde", 1, 5, 0, 5), token("ef", 4, 2, 4, 6), token("f", 1, 1, 5, 6),
+            });
+    TokenStream out = new FlattenGraphFilter(in);
+    assertTokenStreamContents(
+        out,
+        new String[] {"abcde", "f", "ef"},
+        new int[] {0, 5, 5},
+        new int[] {5, 6, 6},
+        new int[] {1, 1, 0},
+        new int[] {1, 1, 1},
+        6);
+  }
+
+  /**
+   * build CharsRef containing 2-4 tokens
+   *
+   * @param tokens vocabulary of tokens
+   * @param charsRefBuilder CharsRefBuilder
+   * @param random Random for selecting tokens
+   * @return Charsref containing 2-4 tokens.
+   */
+  private CharsRef buildMultiTokenCharsRef(
+      String[] tokens, CharsRefBuilder charsRefBuilder, Random random) {
+    int srcLen = random.nextInt(2) + 2;
+    String[] srcTokens = new String[srcLen];
+    for (int pos = 0; pos < srcLen; pos++) {
+      srcTokens[pos] = tokens[random().nextInt(tokens.length)];
+    }
+    SynonymMap.Builder.join(srcTokens, charsRefBuilder);
+    return charsRefBuilder.toCharsRef();
+  }
+
+  // Create a random graph then delete some edges to see if we can trip up FlattenGraphFilter
+  public void testRandomGraphs() throws Exception {
+    String[] baseTokens = new String[] {"t1", "t2", "t3", "t4"};
+    String[] synTokens = new String[] {"s1", "s2", "s3", "s4"};
+
+    SynonymMap.Builder mapBuilder = new SynonymMap.Builder();
+    CharsRefBuilder charRefBuilder = new CharsRefBuilder();
+    Random random = random();
+
+    // between 10 and 20 synonym entries
+    int synCount = random.nextInt(10) + 10;
+    for (int i = 0; i < synCount; i++) {
+      int type = random.nextInt(4);
+      CharsRef src;
+      CharsRef dest;
+      switch (type) {
+        case 0:
+          // 1:1
+          src = charRefBuilder.append(baseTokens[random.nextInt(baseTokens.length)]).toCharsRef();
+          charRefBuilder.clear();
+          dest = charRefBuilder.append(synTokens[random.nextInt(synTokens.length)]).toCharsRef();
+          charRefBuilder.clear();
+          break;
+        case 1:
+          // many:1
+          src = buildMultiTokenCharsRef(baseTokens, charRefBuilder, random);
+          charRefBuilder.clear();
+          dest = charRefBuilder.append(synTokens[random.nextInt(synTokens.length)]).toCharsRef();
+          charRefBuilder.clear();
+          break;
+        case 2:
+          // 1:many
+          src = charRefBuilder.append(baseTokens[random.nextInt(baseTokens.length)]).toCharsRef();
+          charRefBuilder.clear();
+          dest = buildMultiTokenCharsRef(synTokens, charRefBuilder, random);
+          charRefBuilder.clear();
+          break;
+        default:
+          // many:many
+          src = buildMultiTokenCharsRef(baseTokens, charRefBuilder, random);
+          charRefBuilder.clear();
+          dest = buildMultiTokenCharsRef(synTokens, charRefBuilder, random);
+          charRefBuilder.clear();
+      }
+      mapBuilder.add(src, dest, true);
+    }
+
+    SynonymMap synMap = mapBuilder.build();
+
+    int stopWordCount = random.nextInt(4) + 1;
+    CharArraySet stopWords = new CharArraySet(stopWordCount, true);
+    while (stopWords.size() < stopWordCount) {
+      int index = random.nextInt(baseTokens.length + synTokens.length);
+      String[] tokenArray = baseTokens;
+      if (index >= baseTokens.length) {
+        index -= baseTokens.length;
+        tokenArray = synTokens;
+      }
+      stopWords.add(tokenArray[index]);
+    }
+
+    Analyzer withFlattenGraph =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            Tokenizer in = new WhitespaceTokenizer();
+            TokenStream result = new SynonymGraphFilter(in, synMap, true);
+            result = new StopFilter(result, stopWords);
+            result = new FlattenGraphFilter(result);
+            return new TokenStreamComponents(in, result);
+          }
+        };
+
+    int tokenCount = random.nextInt(20) + 20;
+    List<String> stringTokens = new ArrayList<>();
+    while (stringTokens.size() < tokenCount) {
+      stringTokens.add(baseTokens[random.nextInt(baseTokens.length)]);
+    }
+
+    String text = String.join(" ", stringTokens);
+    // FlattenGraphFilter can create inconsistent offsets.
+    // If that is resolved we can check offsets
+    // Until then converting to automaton will pull text through and check if we hit asserts.
+    // checkAnalysisConsistency(random, withFlattenGraph, false, text);
+    TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
+    TokenStream flattenedTokenStream = withFlattenGraph.tokenStream("field", text);
+    assertFalse(Operations.hasDeadStates(tsta.toAutomaton(flattenedTokenStream)));
+    flattenedTokenStream.close();
+
+    /*
+       CheckGeneralization can get VERY slow as matching holes to tokens or other holes generates a lot of potentially valid paths.
+       Analyzer withoutFlattenGraph =
+           new Analyzer() {
+             @Override
+             protected TokenStreamComponents createComponents(String fieldName) {
+               Tokenizer in = new WhitespaceTokenizer();
+               TokenStream result = new SynonymGraphFilter(in, synMap, true);
+               result = new StopFilter(result, stopWords);
+               return new TokenStreamComponents(in, result);
+             }
+           };
+       checkGeneralization(
+           withFlattenGraph.tokenStream("field", text),
+           withoutFlattenGraph.tokenStream("field", text));
+
+    */
+  }
+
+  /*
+   * Make some strings, make an automaton that accepts those strings, convert that automaton into a TokenStream,
+   * flatten it, back to an automaton, and see if the original strings are still accepted.
+   */
+  public void testPathsNotLost() throws IOException {
+    int wordCount = random().nextInt(5) + 5;
+    List<BytesRef> acceptStrings = new LinkedList<>();
+    for (int i = 0; i < wordCount; i++) {
+      int wordLen = random().nextInt(5) + 5;
+      BytesRef ref = new BytesRef(wordLen);
+      ref.length = wordLen;
+      ref.offset = 0;
+      for (int j = 0; j < wordLen; j++) {
+        ref.bytes[j] = (byte) (random().nextInt(5) + 65);
+      }
+      acceptStrings.add(ref);
+    }
+    acceptStrings.sort(Comparator.naturalOrder());
+
+    acceptStrings = acceptStrings.stream().limit(wordCount).collect(Collectors.toList());
+    Automaton nonFlattenedAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+
+    TokenStream ts = AutomatonToTokenStream.toTokenStream(nonFlattenedAutomaton);
+    TokenStream flattenedTokenStream = new FlattenGraphFilter(ts);
+    TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
+    Automaton flattenedAutomaton = tsta.toAutomaton(flattenedTokenStream);
+
+    // TokenStreamToAutomaton adds position increment transitions into the automaton.
+    List<BytesRef> acceptStringsWithPosSep = createAcceptStringsWithPosSep(acceptStrings);
+
+    for (BytesRef acceptString : acceptStringsWithPosSep) {
+      assertTrue(
+          "string not accepted " + acceptString.utf8ToString(),
+          recursivelyValidate(acceptString, 0, 0, flattenedAutomaton));
+    }
+  }
+
+  /**
+   * adds POS_SEP bytes between bytes to match TokenStreamToAutomaton format.
+   *
+   * @param acceptStrings Byte refs of accepted strings. Each byte is a transition
+   * @return List of ByteRefs where each byte is separated by a POS_SEP byte.
+   */
+  private List<BytesRef> createAcceptStringsWithPosSep(List<BytesRef> acceptStrings) {
+    List<BytesRef> acceptStringsWithPosSep = new ArrayList<>();
+    for (BytesRef acceptString : acceptStrings) {
+      BytesRef withPosSep = new BytesRef(acceptString.length * 2 - 1);
+      withPosSep.length = acceptString.length * 2 - 1;
+      withPosSep.offset = 0;
+      for (int i = 0; i < acceptString.length; i++) {
+        withPosSep.bytes[i * 2] = acceptString.bytes[i];
+        if (i * 2 + 1 < withPosSep.length) {
+          withPosSep.bytes[i * 2 + 1] = TokenStreamToAutomaton.POS_SEP;
+        }
+      }
+      acceptStringsWithPosSep.add(withPosSep);
+    }
+    return acceptStringsWithPosSep;
+  }
+
+  /**
+   * Checks if acceptString is accepted by the automaton. Automaton may be an NFA.
+   *
+   * @param acceptString String to test
+   * @param acceptStringIndex current index into acceptString, initial value should be 0
+   * @param state state to transition from. initial value should be 0
+   * @param automaton Automaton to test
+   * @return true if acceptString is accepted by the automaton. otherwise false.
+   */
+  public boolean recursivelyValidate(
+      BytesRef acceptString, int acceptStringIndex, int state, Automaton automaton) {
+    if (acceptStringIndex == acceptString.length) {
+      return automaton.isAccept(state);
+    }
+
+    Transition transition = new Transition();
+    automaton.initTransition(state, transition);
+    int numTransitions = automaton.getNumTransitions(state);
+    boolean accept = false;
+    // Automaton can be NFA, so we need to check all matching transitions
+    for (int i = 0; i < numTransitions; i++) {
+      automaton.getTransition(state, i, transition);
+      if (transition.min <= acceptString.bytes[acceptStringIndex]
+          && transition.max >= acceptString.bytes[acceptStringIndex]) {
+        accept =
+            recursivelyValidate(acceptString, acceptStringIndex + 1, transition.dest, automaton);
+      }
+      if (accept == true) {
+        break;
+      }
+    }
+    return accept;
+  }
+
+  /**
+   * This method checks if strings that lead to the accept state of the not flattened TokenStream
+   * also lead to the accept state in the flattened TokenStream. This gets complicated when you
+   * factor in holes. The FlattenGraphFilter will remove alternate paths that are made entirely of
+   * holes. An alternate path of Holes is indistinguishable from a path that just has long
+   * lengths(ex: testStrangelyNumberedNodes). Also alternate paths that end in multiple holes could
+   * be interpreted as sequential holes after the branching has converged during flattening. This
+   * leads to a lot of weird logic about navigating around holes that may compromise the accuracy of
+   * this test.
+   *
+   * @param flattened flattened TokenStream
+   * @param notFlattened not flattened TokenStream
+   * @throws IOException on error creating Automata
+   */
+  /* private void checkGeneralization(TokenStream flattened, TokenStream notFlattened)
+      throws IOException {
+    TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
+
+    List<LinkedList<Integer>> acceptStrings = getAcceptStrings(tsta.toAutomaton(notFlattened));
+    checkAcceptStrings(acceptStrings, tsta.toAutomaton(flattened));
+    flattened.close();
+    notFlattened.close();
+  }*/
+
+  /**
+   * gets up to 10000 strings that lead to accept state in the given automaton.
+   *
+   * @param automaton automaton
+   * @return list of accept sequences
+   */
+  /* private List<LinkedList<Integer>> getAcceptStrings(Automaton automaton) {
+    List<LinkedList<Integer>> acceptedSequences = new LinkedList<>();
+    LinkedList<Integer> prefix = new LinkedList<>();
+    // state 0 is always the start node
+    // Particularly branching automatons can create lots of possible acceptable strings. limit to
+    // the first 10K
+    buildAcceptStringRecursive(automaton, 0, prefix, acceptedSequences, 10000);
+    return acceptedSequences;
+  }*/
+
+  /**
+   * @param automaton automaton to generate strings from
+   * @param state state to start at
+   * @param prefix string prefix
+   * @param acceptedSequences List of strings build so far.
+   * @param limit maximum number of acceptedSequences.
+   */
+  /*private void buildAcceptStringRecursive(
+      Automaton automaton,
+      int state,
+      LinkedList<Integer> prefix,
+      List<LinkedList<Integer>> acceptedSequences,
+      int limit) {
+    if (acceptedSequences.size() == limit) {
+      return;
+    }
+    if (automaton.isAccept(state)) {
+      acceptedSequences.add(new LinkedList<>(prefix));
+      return;
+    }
+    int numTransitions = automaton.getNumTransitions(state);
+    Transition transition = new Transition();
+    for (int i = 0; i < numTransitions; i++) {
+      automaton.getTransition(state, i, transition);
+      // min and max are the same transitions made from TokenStreamToAutomaton
+      prefix.addLast(transition.min);
+      buildAcceptStringRecursive(automaton, transition.dest, prefix, acceptedSequences, limit);
+      prefix.removeLast();
+    }
+  }
+
+  private void checkAcceptStrings(List<LinkedList<Integer>> acceptSequence, Automaton automaton) {
+    for (LinkedList<Integer> acceptString : acceptSequence) {
+      assertTrue(
+          "String did not lead to accept state " + acceptString,
+          recursivelyValidateWithHoles(acceptString, 0, automaton));
+    }
+  }
+
+  private boolean recursivelyValidateWithHoles(
+      LinkedList<Integer> acceptSequence, int state, Automaton automaton) {
+    if (acceptSequence.isEmpty()) {
+      return automaton.isAccept(state);
+    }
+
+    Integer curr = acceptSequence.pop();
+    int numTransitions = automaton.getNumTransitions(state);
+    Transition transition = new Transition();
+
+    boolean accept = false;
+    // Automaton can be NFA, so we need to check all matching transitions
+    for (int i = 0; i < numTransitions; i++) {
+      automaton.getTransition(state, i, transition);
+      if (transition.min <= curr && transition.max >= curr) {
+        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
+        // Factoring in flattened graphs the space covered by a hole may be bigger in the flattened
+        // graph.
+        // Try consuming more steps with holes.
+        if (accept == false
+            && transition.min == TokenStreamToAutomaton.HOLE
+            && transition.max == TokenStreamToAutomaton.HOLE) {
+          acceptSequence.push(TokenStreamToAutomaton.HOLE);
+          acceptSequence.push(TokenStreamToAutomaton.POS_SEP);
+          accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
+          acceptSequence.pop();
+          acceptSequence.pop();
+        }
+      } else if (transition.min == TokenStreamToAutomaton.HOLE
+          && transition.max == TokenStreamToAutomaton.HOLE
+          && automaton.getNumTransitions(transition.dest) > 0) {
+        //consume multiple holes in the automaton
+        // clear POS_INC
+        automaton.getTransition(transition.dest, 0, transition);
+        acceptSequence.push(curr);
+        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
+        acceptSequence.pop();
+      } else if(curr == TokenStreamToAutomaton.HOLE) {
+        //consume non-holes in the automaton with holes
+        while (transition.min != TokenStreamToAutomaton.POS_SEP
+                && automaton.getNumTransitions(transition.dest) > 0) {
+          automaton.getTransition(transition.dest, 0, transition);
+        }
+        acceptSequence.push(curr);
+        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
+        acceptSequence.pop();
+      }
+      if (accept) {
+        break;
+      }
+    }
+    // Flatten graph filter will remove side paths that are only Holes. Gaps may also change size as
+    // graph is flattened.
+    // Traverse over them if curr is a hole to make sure the gap is kept
+    if (accept == false && curr == TokenStreamToAutomaton.HOLE && acceptSequence.size() > 0) {
+      // get rid of the separator
+      acceptSequence.pop();
+
+      for (int i = 0; i < numTransitions; i++) {
+        automaton.getTransition(state, i, transition);
+        //advance to the next POS_SEP in automaton
+        while (transition.min != TokenStreamToAutomaton.POS_SEP
+            && automaton.getNumTransitions(transition.dest) > 0) {
+          automaton.getTransition(transition.dest, 0, transition);
+        }
+        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
+        if (accept) {
+          break;
+        }
+      }
+
+      // might be multiple holes squashed under a one step path. Try burning remaining holes
+      if (accept == false) {
+        accept = recursivelyValidateWithHoles(acceptSequence, state, automaton);
+      }
+
+      acceptSequence.push(TokenStreamToAutomaton.POS_SEP);
+    }
+    acceptSequence.push(curr);
+    return accept;
+  } */
+
   // NOTE: TestSynonymGraphFilter's testRandomSyns also tests FlattenGraphFilter
 }
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java
new file mode 100644
index 000000000000..ef1bbd20bea5
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.Transition;
+
+/** Converts an Automaton into a TokenStream. */
+public class AutomatonToTokenStream {
+
+  private AutomatonToTokenStream() {}
+
+  /**
+   * converts an automaton into a TokenStream. This is done by first Topo sorting the nodes in the
+   * Automaton. Nodes that have the same distance from the start are grouped together to form the
+   * position nodes for the TokenStream. The resulting TokenStream releases edges from the automaton
+   * as tokens in order from the position nodes. This requires the automaton be a finite DAG.
+   *
+   * @param automaton automaton to convert. Must be a finite DAG.
+   * @return TokenStream representation of automaton.
+   */
+  public static TokenStream toTokenStream(Automaton automaton) {
+    if (Operations.isFinite(automaton) == false) {
+      throw new IllegalArgumentException("Automaton must be finite");
+    }
+
+    List<List<Integer>> positionNodes = new ArrayList<>();
+
+    Transition[][] transitions = automaton.getSortedTransitions();
+
+    int[] indegree = new int[transitions.length];
+
+    for (int i = 0; i < transitions.length; i++) {
+      for (int edge = 0; edge < transitions[i].length; edge++) {
+        indegree[transitions[i][edge].dest] += 1;
+      }
+    }
+    if (indegree[0] != 0) {
+      throw new IllegalArgumentException("Start node has incoming edges, creating cycle");
+    }
+
+    LinkedList<RemapNode> noIncomingEdges = new LinkedList<>();
+    Map<Integer, Integer> idToPos = new HashMap<>();
+    noIncomingEdges.addLast(new RemapNode(0, 0));
+    while (noIncomingEdges.isEmpty() == false) {
+      RemapNode currState = noIncomingEdges.removeFirst();
+      for (int i = 0; i < transitions[currState.id].length; i++) {
+        indegree[transitions[currState.id][i].dest] -= 1;
+        if (indegree[transitions[currState.id][i].dest] == 0) {
+          noIncomingEdges.addLast(
+              new RemapNode(transitions[currState.id][i].dest, currState.pos + 1));
+        }
+      }
+      if (positionNodes.size() == currState.pos) {
+        List<Integer> posIncs = new ArrayList<>();
+        posIncs.add(currState.id);
+        positionNodes.add(posIncs);
+      } else {
+        positionNodes.get(currState.pos).add(currState.id);
+      }
+      idToPos.put(currState.id, currState.pos);
+    }
+
+    for (int i = 0; i < indegree.length; i++) {
+      if (indegree[i] != 0) {
+        throw new IllegalArgumentException("Cycle found in automaton");
+      }
+    }
+
+    List<List<EdgeToken>> edgesByLayer = new ArrayList<>();
+    for (List<Integer> layer : positionNodes) {
+      List<EdgeToken> edges = new ArrayList<>();
+      for (int state : layer) {
+        for (Transition t : transitions[state]) {
+          // each edge in the token stream can only be on value, though a transition takes a range.
+          for (int val = t.min; val <= t.max; val++) {
+            int destLayer = idToPos.get(t.dest);
+            edges.add(new EdgeToken(destLayer, val));
+            // If there's an intermediate accept state, add an edge to the terminal state.
+            if (automaton.isAccept(t.dest) && destLayer != positionNodes.size() - 1) {
+              edges.add(new EdgeToken(positionNodes.size() - 1, val));
+            }
+          }
+        }
+      }
+      edgesByLayer.add(edges);
+    }
+
+    return new TopoTokenStream(edgesByLayer);
+  }
+
+  /** Token Stream that outputs tokens from a topo sorted graph. */
+  private static class TopoTokenStream extends TokenStream {
+
+    private final List<List<EdgeToken>> edgesByPos;
+    private int currentPos;
+    private int currentEdgeIndex;
+    private CharTermAttribute charAttr = addAttribute(CharTermAttribute.class);
+    private PositionIncrementAttribute incAttr = addAttribute(PositionIncrementAttribute.class);
+    private PositionLengthAttribute lenAttr = addAttribute(PositionLengthAttribute.class);
+    private OffsetAttribute offAttr = addAttribute(OffsetAttribute.class);
+
+    public TopoTokenStream(List<List<EdgeToken>> edgesByPos) {
+      this.edgesByPos = edgesByPos;
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+      clearAttributes();
+      while (currentPos < edgesByPos.size()
+          && currentEdgeIndex == edgesByPos.get(currentPos).size()) {
+        currentEdgeIndex = 0;
+        currentPos += 1;
+      }
+      if (currentPos == edgesByPos.size()) {
+        return false;
+      }
+      EdgeToken currentEdge = edgesByPos.get(currentPos).get(currentEdgeIndex);
+
+      charAttr.append((char) currentEdge.value);
+
+      incAttr.setPositionIncrement(currentEdgeIndex == 0 ? 1 : 0);
+
+      lenAttr.setPositionLength(currentEdge.destination - currentPos);
+
+      offAttr.setOffset(currentPos, currentEdge.destination);
+
+      currentEdgeIndex++;
+
+      return true;
+    }
+
+    @Override
+    public void reset() throws IOException {
+      super.reset();
+      clearAttributes();
+      currentPos = 0;
+      currentEdgeIndex = 0;
+    }
+
+    @Override
+    public void end() throws IOException {
+      clearAttributes();
+      incAttr.setPositionIncrement(0);
+      // -1 because we don't count the terminal state as a position in the TokenStream
+      offAttr.setOffset(edgesByPos.size() - 1, edgesByPos.size() - 1);
+    }
+  }
+
+  /** Edge between position nodes. These edges will be output as tokens in the TokenStream */
+  private static class EdgeToken {
+    public final int destination;
+    public final int value;
+
+    public EdgeToken(int destination, int value) {
+      this.destination = destination;
+      this.value = value;
+    }
+  }
+
+  /** Node that contains original node id and position in TokenStream */
+  private static class RemapNode {
+    public final int id;
+    public final int pos;
+
+    public RemapNode(int id, int pos) {
+      this.id = id;
+      this.pos = pos;
+    }
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
new file mode 100644
index 000000000000..369856eaf89f
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+
+public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {
+
+  public void testSinglePath() throws IOException {
+    List<BytesRef> acceptStrings = new ArrayList<>();
+    acceptStrings.add(new BytesRef("abc"));
+
+    Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
+    assertTokenStreamContents(
+        ts,
+        new String[] {"a", "b", "c"},
+        new int[] {0, 1, 2},
+        new int[] {1, 2, 3},
+        new int[] {1, 1, 1},
+        new int[] {1, 1, 1},
+        3);
+  }
+
+  public void testParallelPaths() throws IOException {
+    List<BytesRef> acceptStrings = new ArrayList<>();
+    acceptStrings.add(new BytesRef("123"));
+    acceptStrings.add(new BytesRef("abc"));
+
+    Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
+    assertTokenStreamContents(
+        ts,
+        new String[] {"1", "a", "2", "b", "3", "c"},
+        new int[] {0, 0, 1, 1, 2, 2},
+        new int[] {1, 1, 2, 2, 3, 3},
+        new int[] {1, 0, 1, 0, 1, 0},
+        new int[] {1, 1, 1, 1, 1, 1},
+        3);
+  }
+
+  public void testForkedPath() throws IOException {
+    List<BytesRef> acceptStrings = new ArrayList<>();
+    acceptStrings.add(new BytesRef("ab3"));
+    acceptStrings.add(new BytesRef("abc"));
+
+    Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
+    assertTokenStreamContents(
+        ts,
+        new String[] {"a", "b", "3", "c"},
+        new int[] {0, 1, 2, 2},
+        new int[] {1, 2, 3, 3},
+        new int[] {1, 1, 1, 0},
+        new int[] {1, 1, 1, 1},
+        3);
+  }
+
+  public void testNonDeterministicGraph() throws IOException {
+    Automaton.Builder builder = new Automaton.Builder();
+    int start = builder.createState();
+    int middle1 = builder.createState();
+    int middle2 = builder.createState();
+    int accept = builder.createState();
+
+    builder.addTransition(start, middle1, 'a');
+    builder.addTransition(start, middle2, 'a');
+    builder.addTransition(middle1, accept, 'b');
+    builder.addTransition(middle2, accept, 'c');
+    builder.setAccept(accept, true);
+
+    Automaton nfa = builder.finish();
+    TokenStream ts = AutomatonToTokenStream.toTokenStream(nfa);
+    assertTokenStreamContents(
+        ts,
+        new String[] {"a", "a", "b", "c"},
+        new int[] {0, 0, 1, 1},
+        new int[] {1, 1, 2, 2},
+        new int[] {1, 0, 1, 0},
+        new int[] {1, 1, 1, 1},
+        2);
+  }
+
+  public void testGraphWithStartNodeCycle() {
+    Automaton.Builder builder = new Automaton.Builder();
+    int start = builder.createState();
+    int middle = builder.createState();
+    int accept = builder.createState();
+
+    builder.addTransition(start, middle, 'a');
+    builder.addTransition(middle, accept, 'b');
+    builder.addTransition(middle, start, '1');
+
+    builder.setAccept(accept, true);
+
+    Automaton cycleGraph = builder.finish();
+    expectThrows(
+        IllegalArgumentException.class, () -> AutomatonToTokenStream.toTokenStream(cycleGraph));
+  }
+
+  public void testGraphWithNonStartCycle() {
+    Automaton.Builder builder = new Automaton.Builder();
+    int start = builder.createState();
+    int middle = builder.createState();
+    int accept = builder.createState();
+
+    builder.addTransition(start, middle, 'a');
+    builder.addTransition(middle, accept, 'b');
+    builder.addTransition(accept, middle, 'c');
+    builder.setAccept(accept, true);
+
+    Automaton cycleGraph = builder.finish();
+    expectThrows(
+        IllegalArgumentException.class, () -> AutomatonToTokenStream.toTokenStream(cycleGraph));
+  }
+}