Skip to content

Commit

Permalink
LUCENE-7606: Normalization with CustomAnalyzer would only apply the l…
Browse files Browse the repository at this point in the history
…ast token filter.
  • Loading branch information
jpountz committed Dec 30, 2016
1 parent 3ccd15a commit 26ee8e9
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 1 deletion.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ Bug Fixes
using helpers for exclusive bounds that are consistent with Double.compare.
(Adrien Grand, Dawid Weiss)

* LUCENE-7606: Normalization with CustomAnalyzer would only apply the last
token filter. (Adrien Grand)

Improvements

* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ protected TokenStream normalize(String fieldName, TokenStream in) {
for (TokenFilterFactory filter : tokenFilters) {
if (filter instanceof MultiTermAwareComponent) {
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
result = filter.create(in);
result = filter.create(result);
}
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
Expand Down Expand Up @@ -479,4 +480,24 @@ public void testNormalization() throws IOException {
assertEquals(new BytesRef("2A"), analyzer2.normalize("dummy", "0À"));
}

public void testNormalizationWithMultipleTokenFilters() throws IOException {
CustomAnalyzer analyzer = CustomAnalyzer.builder()
// none of these components are multi-term aware so they should not be applied
.withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap())
.addTokenFilter(LowerCaseFilterFactory.class, Collections.emptyMap())
.addTokenFilter(ASCIIFoldingFilterFactory.class, Collections.emptyMap())
.build();
assertEquals(new BytesRef("a b e"), analyzer.normalize("dummy", "À B é"));
}

public void testNormalizationWithMultiplCharFilters() throws IOException {
CustomAnalyzer analyzer = CustomAnalyzer.builder()
// none of these components are multi-term aware so they should not be applied
.withTokenizer(WhitespaceTokenizerFactory.class, Collections.emptyMap())
.addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping1.txt")))
.addCharFilter(MappingCharFilterFactory.class, new HashMap<>(Collections.singletonMap("mapping", "org/apache/lucene/analysis/custom/mapping2.txt")))
.build();
assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c"));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"a" => "e"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"b" => "f"

0 comments on commit 26ee8e9

Please sign in to comment.