diff --git a/Description.props b/Description.props index b193c93..6fe2408 100644 --- a/Description.props +++ b/Description.props @@ -4,10 +4,10 @@ PackageName=SCStemmers # Version (required) -Version=1.1.0 +Version=1.1.1 # Date -Date=2018-02-24 +Date=2018-10-16 # Title (required) Title=A collection of stemmers for Serbian and Croatian. @@ -27,7 +27,7 @@ License=GPL 3.0 Description=This package contains Java implementations of three previously published stemmers for Serbian - two of them by Keselj and Sipka, one by Milosevic - and one for Croatian by Ljubesic and Pandzic. All stemmers require the input text to be in UTF-8. The stemmers accept text in both the Cyrillic and Latin scripts as input, and give the output in the Latin script. Performance comparisons between the stemmers (on the task of sentiment analysis) can be found in the paper "Reliable Baselines for Sentiment Analysis in Resource-Limited Languages: The Serbian Movie Review Dataset," Vuk Batanovic, Bosko Nikolic, Milan Milosavljevic, in Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016), pp. 2688-2696, Portoroz, Slovenia (2016). See the webpage for the list of reference papers and more information. # Package URL for obtaining the package archive (required) -PackageURL=https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.0/SCStemmers_1.1.0.zip +PackageURL=https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.1/SCStemmers_1.1.1.zip # URL for further information URL=https://github.com/vukbatanovic/SCStemmers/ diff --git a/README.md b/README.md index 6795308..73cd565 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ public void stemFile (String fileInput, String fileOutput) ``` ### Command-line interface -The supplied [SCStemmers.jar](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.0/SCStemmers.jar) file makes it possible to stem the contents of textual files using the command line. Stemmers from the SCStemmers package can be invoked by the following command: +The supplied [SCStemmers.jar](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.1/SCStemmers.jar) file makes it possible to stem the contents of textual files using the command line. Stemmers from the SCStemmers package can be invoked by the following command: ``` java -jar SCStemmers.jar StemmerID InputFile OutputFile ``` @@ -50,7 +50,7 @@ where *StemmerID* is a number identifying the stemming algorithm: ### Weka Alternatively, the stemmers can be utilized as an unofficial plug-in module within Weka (Waikato Environment for Knowledge Analysis). -To do so, download the [SCStemmers Weka package](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.0/SCStemmers_1.1.0.zip). +To do so, download the [SCStemmers Weka package](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.1/SCStemmers_1.1.1.zip). Open the Weka package manager (available in Weka >= 3.7) and use the "Unofficial - File/URL" option to select and install SCStemmers. After restarting Weka, the list of available stemmers (within the StringToWordVector filter) will also contain the four stemmers from this package. diff --git a/src/weka/core/stemmers/LjubesicPandzicStemmer.java b/src/weka/core/stemmers/LjubesicPandzicStemmer.java index 301fc30..d7141f0 100644 --- a/src/weka/core/stemmers/LjubesicPandzicStemmer.java +++ b/src/weka/core/stemmers/LjubesicPandzicStemmer.java @@ -128,7 +128,7 @@ public String stemLine(String line) { private String transform (String word) { for (String key: transformations.keySet()) if (word.endsWith(key)) - return word.replace(key, transformations.get(key)); + return word.substring(0, word.length()-key.length()) + transformations.get(key); return word; }