diff --git a/.github/workflows/gradle-jar.yml b/.github/workflows/gradle-jar.yml index 658c5e0..d5eeea0 100644 --- a/.github/workflows/gradle-jar.yml +++ b/.github/workflows/gradle-jar.yml @@ -29,7 +29,7 @@ jobs: - name: Build run: | cd repo - ./gradlew jar + ./gradlew jar sourcesJar javadocJar - name: Release uses: softprops/action-gh-release@v2 diff --git a/.gitignore b/.gitignore index f93e71b..c5a7e81 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ .gradle .vscode *.code-workspace +lib/bin # Ignore Gradle build output directory build diff --git a/README.rst b/README.rst index 4559c3e..70768df 100644 --- a/README.rst +++ b/README.rst @@ -7,9 +7,23 @@ This project contains: - a filter that converts roman numerals into arabic ones, and - a value source that correctly sorts strings with numbers. -The Latin stemmer uses an algorithm by Schinke et al. +Latin Stem Filter +----------------- -.. seealso:: +Usage example in :code:`schema.xml`: + +.. code-block:: xml + + + + + + + + + +The stemmer uses an algorithm by Schinke et al. See: Schinke R, Greengrass M, Robertson AM and Willett P (1996) :title:`A stemming algorithm for Latin text databases.` @@ -17,7 +31,28 @@ The Latin stemmer uses an algorithm by Schinke et al. https://snowballstem.org/otherapps/schinke/ -The filter will convert roman "XLII" to arabic "42". + +Roman Numerals Filter +--------------------- + +The filter will convert roman :code:`XLII` to arabic :code:`42`. + +Usage example in :code:`schema.xml`: + +.. code-block:: xml + + + + + + + + + + +Sorting Value Source +-------------------- The value source generates a string that can be used as a key to sort strings correctly like this: @@ -29,3 +64,17 @@ instead of alphabetically, like this: #. paris-bn-lat-10528 #. paris-bn-lat-4638 + +Usage example in :code:`solrconfig.xml`: + +.. code-block:: xml + + + + ... + + +In the query set the :code:`sort` parameter to: :code:`strnumsort(my_alphanum_id) asc` diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts index 152b493..dde40b2 100644 --- a/lib/build.gradle.kts +++ b/lib/build.gradle.kts @@ -29,15 +29,26 @@ dependencies { testImplementation("org.apache.solr:solr-test-framework:9.6.1") } -// Apply a specific Java toolchain to ease working on different environments. +base { + archivesName = "capitularia-lucene-tools" +} + java { + withJavadocJar() + withSourcesJar() toolchain { languageVersion = JavaLanguageVersion.of(17) } } -tasks.jar { - archiveBaseName.set("capitularia-lucene-tools") +tasks.withType { + options { + this as StandardJavadocDocletOptions + addStringOption( + "tag", + "lucene.spi:t:SPI Name:" + ) + } } tasks.named("test") { diff --git a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/LatinStemFilter.java b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/LatinStemFilter.java index 8336567..c00baf0 100644 --- a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/LatinStemFilter.java +++ b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/LatinStemFilter.java @@ -18,8 +18,11 @@ * See: {@link LatinStemmer} */ public final class LatinStemFilter extends TokenFilter { + /** The default for the parameter minNounSize */ public static final int DEFAULT_MIN_NOUN_SIZE = 2; + /** The default for the parameter minVerbSize */ public static final int DEFAULT_MIN_VERB_SIZE = 2; + /** The default for the parameter preserveOriginal */ public static final boolean DEFAULT_PRESERVE_ORIGINAL = true; private static final Locale LOCALE = @@ -36,6 +39,10 @@ public final class LatinStemFilter extends TokenFilter { private final Deque stack = new ArrayDeque<>(); private State state; + /** + * Constructor + * @param input The input token stream + */ protected LatinStemFilter(TokenStream input) { this( input, @@ -45,6 +52,13 @@ protected LatinStemFilter(TokenStream input) { ); } + /** + * Constructor + * @param input The input token stream + * @param minNounSize The minimum noun size to stem + * @param minVerbSize The minimum verb size to stem + * @param preserveOriginal Preserves the original word if true + */ protected LatinStemFilter( TokenStream input, int minNounSize, diff --git a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/RomanNumeralsFilter.java b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/RomanNumeralsFilter.java index 51bb07b..fc611b2 100644 --- a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/RomanNumeralsFilter.java +++ b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/RomanNumeralsFilter.java @@ -13,6 +13,7 @@ * This filter replaces roman numerals with arabic ones, eg. "XLII" => "42" */ public final class RomanNumeralsFilter extends TokenFilter { + /** The default for the parameter preserveOriginal */ public static final boolean DEFAULT_PRESERVE_ORIGINAL = false; private final CharTermAttribute termAttr; @@ -23,6 +24,10 @@ public final class RomanNumeralsFilter extends TokenFilter { private String original; private State state; + /** + * Constructor + * @param input The input token stream + */ protected RomanNumeralsFilter(TokenStream input) { this( input, @@ -30,6 +35,11 @@ protected RomanNumeralsFilter(TokenStream input) { ); } + /** + * Constructor + * @param input The input token stream + * @param preserveOriginal Preserves the original word if true + */ protected RomanNumeralsFilter(TokenStream input, boolean preserveOriginal) { super(input); this.preserveOriginal = preserveOriginal; diff --git a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSource.java b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSource.java index ff9a6a8..3f95426 100644 --- a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSource.java +++ b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSource.java @@ -30,7 +30,9 @@ * */ public final class StringNumberSortValueSource extends SingleFunction { + /** The name of the value source */ public static final String NAME = "strnumsort"; + /** The regex pattern used to find the numbers to convert */ public static final Pattern REGEX = Pattern.compile("\\d+"); /** @@ -46,6 +48,13 @@ protected String name() { return NAME; } + /** + * Replaces the numbers with keys that sort correctly. + * @param doc The document + * @param vals The function values + * @return The string with numbers replaced + * @throws IOException If anything bad happened in strVal + */ protected String func(int doc, FunctionValues vals) throws IOException { String s = vals.strVal(doc); return REGEX.matcher(s).replaceAll(mr -> mr.group().length() + mr.group()); diff --git a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSourceParser.java b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSourceParser.java index 836498b..d22d317 100644 --- a/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSourceParser.java +++ b/lib/src/main/java/de/uni_koeln/capitularia/lucene_tools/StringNumberSortValueSourceParser.java @@ -14,6 +14,7 @@ *
  * {@code }
+ * 
*/ public final class StringNumberSortValueSourceParser extends ValueSourceParser { /**