From 6b448c044dcf96beeab77ebc5c6e6a088a37ad23 Mon Sep 17 00:00:00 2001 From: vukbatanovic Date: Sat, 24 Feb 2018 05:32:41 +0100 Subject: [PATCH] Update the documentation to version 1.1.0 --- Description.props | 12 +++--- README.md | 8 ++-- doc/allclasses-frame.html | 4 +- doc/allclasses-noframe.html | 4 +- doc/constant-values.html | 4 +- doc/deprecated-list.html | 4 +- doc/help-doc.html | 4 +- doc/index-files/index-1.html | 8 +++- doc/index-files/index-10.html | 4 +- doc/index-files/index-11.html | 4 +- doc/index-files/index-12.html | 4 +- doc/index-files/index-13.html | 4 +- doc/index-files/index-14.html | 4 +- doc/index-files/index-2.html | 4 +- doc/index-files/index-3.html | 4 +- doc/index-files/index-4.html | 4 +- doc/index-files/index-5.html | 4 +- doc/index-files/index-6.html | 4 +- doc/index-files/index-7.html | 4 +- doc/index-files/index-8.html | 4 +- doc/index-files/index-9.html | 4 +- doc/index.html | 13 +++--- doc/overview-tree.html | 4 +- doc/serialized-form.html | 4 +- .../core/stemmers/KeseljSipkaStemmer.html | 4 +- .../stemmers/KeseljSipkaStemmerGreedy.html | 4 +- .../stemmers/KeseljSipkaStemmerOptimal.html | 4 +- .../core/stemmers/LjubesicPandzicStemmer.html | 40 +++++++++++++++---- doc/weka/core/stemmers/MilosevicStemmer.html | 4 +- doc/weka/core/stemmers/SCStemmer.html | 4 +- doc/weka/core/stemmers/SerbianStemmer.html | 4 +- .../class-use/KeseljSipkaStemmer.html | 4 +- .../class-use/KeseljSipkaStemmerGreedy.html | 4 +- .../class-use/KeseljSipkaStemmerOptimal.html | 4 +- .../class-use/LjubesicPandzicStemmer.html | 4 +- .../stemmers/class-use/MilosevicStemmer.html | 4 +- .../core/stemmers/class-use/SCStemmer.html | 4 +- .../stemmers/class-use/SerbianStemmer.html | 4 +- doc/weka/core/stemmers/package-frame.html | 4 +- doc/weka/core/stemmers/package-summary.html | 4 +- doc/weka/core/stemmers/package-tree.html | 4 +- doc/weka/core/stemmers/package-use.html | 4 +- 42 files changed, 129 insertions(+), 100 deletions(-) diff --git a/Description.props b/Description.props index 850ccf5..b193c93 100644 --- a/Description.props +++ b/Description.props @@ -4,10 +4,10 @@ PackageName=SCStemmers # Version (required) -Version=1.0.0 +Version=1.1.0 # Date -Date=2016-02-15 +Date=2018-02-24 # Title (required) Title=A collection of stemmers for Serbian and Croatian. @@ -15,19 +15,19 @@ Title=A collection of stemmers for Serbian and Croatian. Category=Preprocessing # Author (required) -Author=Vuk Batanovic +Author=Vuk Batanovic # Maintainer (required) -Maintainer=Vuk Batanovic +Maintainer=Vuk Batanovic # License (required) License=GPL 3.0 # Description (required) -Description=This package contains Java implementations of three previously published stemmers for Serbian - two of them by Keselj and Sipka, one by Milosevic - and one for Croatian by Ljubesic and Pandzic. All stemmers require the input text to be in UTF-8. The stemmers for Serbian accept text in both the Cyrillic and Latin scripts as input, and give the output in the Latin script. The stemmer for Croatian works only with texts in the Latin script. Performance comparisons between the stemmers (on the task of sentiment analysis) can be found in the paper "Reliable Baselines for Sentiment Analysis in Resource-Limited Languages: The Serbian Movie Review Dataset," Vuk Batanovic, Bosko Nikolic, Milan Milosavljevic, in Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016), pp. 2688-2696, Portoroz, Slovenia (2016). See the webpage for the list of reference papers and more information. +Description=This package contains Java implementations of three previously published stemmers for Serbian - two of them by Keselj and Sipka, one by Milosevic - and one for Croatian by Ljubesic and Pandzic. All stemmers require the input text to be in UTF-8. The stemmers accept text in both the Cyrillic and Latin scripts as input, and give the output in the Latin script. Performance comparisons between the stemmers (on the task of sentiment analysis) can be found in the paper "Reliable Baselines for Sentiment Analysis in Resource-Limited Languages: The Serbian Movie Review Dataset," Vuk Batanovic, Bosko Nikolic, Milan Milosavljevic, in Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016), pp. 2688-2696, Portoroz, Slovenia (2016). See the webpage for the list of reference papers and more information. # Package URL for obtaining the package archive (required) -PackageURL=https://github.com/vukbatanovic/SCStemmers/releases/download/v1.0.0/SCStemmers_1.0.0.zip +PackageURL=https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.0/SCStemmers_1.1.0.zip # URL for further information URL=https://github.com/vukbatanovic/SCStemmers/ diff --git a/README.md b/README.md index 89a97e0..6795308 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,7 @@ This package is a Java reimplementation of four previously published stemming al ## Text Encoding All stemmers expect the input text to be formatted in UTF-8. Their outputs are also UTF-8 encoded. -Since Serbian is a digraphic language the input texts can be in either the Cyrillic or the Latin script. The stemmer for Croatian works only with texts in the Latin script. - -For the sake of consistency all stemmers produce output in the Latin script. +Since Serbian is a digraphic language the input texts can be in either the Cyrillic or the Latin script. All stemmers produce output in the Latin script. ### Dual1 Coding System The stemmers for Serbian internally use the so-called *dual1* coding system in which only the Latin script characters without diacritical marks are allowed. @@ -38,7 +36,7 @@ public void stemFile (String fileInput, String fileOutput) ``` ### Command-line interface -The supplied [SCStemmers.jar](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.0.0/SCStemmers.jar) file makes it possible to stem the contents of textual files using the command line. Stemmers from the SCStemmers package can be invoked by the following command: +The supplied [SCStemmers.jar](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.0/SCStemmers.jar) file makes it possible to stem the contents of textual files using the command line. Stemmers from the SCStemmers package can be invoked by the following command: ``` java -jar SCStemmers.jar StemmerID InputFile OutputFile ``` @@ -52,7 +50,7 @@ where *StemmerID* is a number identifying the stemming algorithm: ### Weka Alternatively, the stemmers can be utilized as an unofficial plug-in module within Weka (Waikato Environment for Knowledge Analysis). -To do so, download the [SCStemmers Weka package](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.0.0/SCStemmers_1.0.0.zip). +To do so, download the [SCStemmers Weka package](https://github.com/vukbatanovic/SCStemmers/releases/download/v1.1.0/SCStemmers_1.1.0.zip). Open the Weka package manager (available in Weka >= 3.7) and use the "Unofficial - File/URL" option to select and install SCStemmers. After restarting Weka, the list of available stemmers (within the StringToWordVector filter) will also contain the four stemmers from this package. diff --git a/doc/allclasses-frame.html b/doc/allclasses-frame.html index 2bf394e..c7e5711 100644 --- a/doc/allclasses-frame.html +++ b/doc/allclasses-frame.html @@ -2,10 +2,10 @@ - + All Classes - + diff --git a/doc/allclasses-noframe.html b/doc/allclasses-noframe.html index d965d6d..8bd40a3 100644 --- a/doc/allclasses-noframe.html +++ b/doc/allclasses-noframe.html @@ -2,10 +2,10 @@ - + All Classes - + diff --git a/doc/constant-values.html b/doc/constant-values.html index a854801..c7d53a3 100644 --- a/doc/constant-values.html +++ b/doc/constant-values.html @@ -2,10 +2,10 @@ - + Constant Field Values - + diff --git a/doc/deprecated-list.html b/doc/deprecated-list.html index 63f914c..d6facc5 100644 --- a/doc/deprecated-list.html +++ b/doc/deprecated-list.html @@ -2,10 +2,10 @@ - + Deprecated List - + diff --git a/doc/help-doc.html b/doc/help-doc.html index 67c547b..03ac2ea 100644 --- a/doc/help-doc.html +++ b/doc/help-doc.html @@ -2,10 +2,10 @@ - + API Help - + diff --git a/doc/index-files/index-1.html b/doc/index-files/index-1.html index 2ab4c5f..d0f15f8 100644 --- a/doc/index-files/index-1.html +++ b/doc/index-files/index-1.html @@ -2,10 +2,10 @@ - + C-Index - + @@ -78,6 +78,10 @@

C

Kapitalizuje slogotvorno R u zadatoj reči, ako postoji
+
convertCyrillicToLatinCharacter(char) - Method in class weka.core.stemmers.LjubesicPandzicStemmer
+
 
+
convertCyrillicToLatinString(String) - Method in class weka.core.stemmers.LjubesicPandzicStemmer
+
 
convertToDual1Character(int, char) - Method in class weka.core.stemmers.KeseljSipkaStemmer
 
convertToDual1Character(int, char) - Method in class weka.core.stemmers.MilosevicStemmer
diff --git a/doc/index-files/index-10.html b/doc/index-files/index-10.html index 3005be4..64b51dd 100644 --- a/doc/index-files/index-10.html +++ b/doc/index-files/index-10.html @@ -2,10 +2,10 @@ - + R-Index - + diff --git a/doc/index-files/index-11.html b/doc/index-files/index-11.html index 47f4e6f..2da1a14 100644 --- a/doc/index-files/index-11.html +++ b/doc/index-files/index-11.html @@ -2,10 +2,10 @@ - + S-Index - + diff --git a/doc/index-files/index-12.html b/doc/index-files/index-12.html index 1322959..55ff912 100644 --- a/doc/index-files/index-12.html +++ b/doc/index-files/index-12.html @@ -2,10 +2,10 @@ - + T-Index - + diff --git a/doc/index-files/index-13.html b/doc/index-files/index-13.html index aa875a8..c85fba9 100644 --- a/doc/index-files/index-13.html +++ b/doc/index-files/index-13.html @@ -2,10 +2,10 @@ - + V-Index - + diff --git a/doc/index-files/index-14.html b/doc/index-files/index-14.html index 49c4dad..b2cd8b3 100644 --- a/doc/index-files/index-14.html +++ b/doc/index-files/index-14.html @@ -2,10 +2,10 @@ - + W-Index - + diff --git a/doc/index-files/index-2.html b/doc/index-files/index-2.html index 29af5a6..f716c6c 100644 --- a/doc/index-files/index-2.html +++ b/doc/index-files/index-2.html @@ -2,10 +2,10 @@ - + D-Index - + diff --git a/doc/index-files/index-3.html b/doc/index-files/index-3.html index 9adecf7..6e6c320 100644 --- a/doc/index-files/index-3.html +++ b/doc/index-files/index-3.html @@ -2,10 +2,10 @@ - + G-Index - + diff --git a/doc/index-files/index-4.html b/doc/index-files/index-4.html index faf5e14..4992673 100644 --- a/doc/index-files/index-4.html +++ b/doc/index-files/index-4.html @@ -2,10 +2,10 @@ - + H-Index - + diff --git a/doc/index-files/index-5.html b/doc/index-files/index-5.html index 4945b34..fe679dd 100644 --- a/doc/index-files/index-5.html +++ b/doc/index-files/index-5.html @@ -2,10 +2,10 @@ - + I-Index - + diff --git a/doc/index-files/index-6.html b/doc/index-files/index-6.html index b2b5b10..8d03ff6 100644 --- a/doc/index-files/index-6.html +++ b/doc/index-files/index-6.html @@ -2,10 +2,10 @@ - + K-Index - + diff --git a/doc/index-files/index-7.html b/doc/index-files/index-7.html index d960395..196e453 100644 --- a/doc/index-files/index-7.html +++ b/doc/index-files/index-7.html @@ -2,10 +2,10 @@ - + L-Index - + diff --git a/doc/index-files/index-8.html b/doc/index-files/index-8.html index d8fb241..6b9a109 100644 --- a/doc/index-files/index-8.html +++ b/doc/index-files/index-8.html @@ -2,10 +2,10 @@ - + M-Index - + diff --git a/doc/index-files/index-9.html b/doc/index-files/index-9.html index 011141f..7dcbf8b 100644 --- a/doc/index-files/index-9.html +++ b/doc/index-files/index-9.html @@ -2,10 +2,10 @@ - + P-Index - + diff --git a/doc/index.html b/doc/index.html index bc40cf2..d862252 100644 --- a/doc/index.html +++ b/doc/index.html @@ -2,15 +2,16 @@ - + Generated Documentation (Untitled) diff --git a/doc/serialized-form.html b/doc/serialized-form.html index b24193b..cb83198 100644 --- a/doc/serialized-form.html +++ b/doc/serialized-form.html @@ -2,10 +2,10 @@ - + Serialized Form - + diff --git a/doc/weka/core/stemmers/KeseljSipkaStemmer.html b/doc/weka/core/stemmers/KeseljSipkaStemmer.html index 3ba199c..d2d8787 100644 --- a/doc/weka/core/stemmers/KeseljSipkaStemmer.html +++ b/doc/weka/core/stemmers/KeseljSipkaStemmer.html @@ -2,10 +2,10 @@ - + KeseljSipkaStemmer - + diff --git a/doc/weka/core/stemmers/KeseljSipkaStemmerGreedy.html b/doc/weka/core/stemmers/KeseljSipkaStemmerGreedy.html index 2a14c8e..7290b32 100644 --- a/doc/weka/core/stemmers/KeseljSipkaStemmerGreedy.html +++ b/doc/weka/core/stemmers/KeseljSipkaStemmerGreedy.html @@ -2,10 +2,10 @@ - + KeseljSipkaStemmerGreedy - + diff --git a/doc/weka/core/stemmers/KeseljSipkaStemmerOptimal.html b/doc/weka/core/stemmers/KeseljSipkaStemmerOptimal.html index d3628d1..85cff30 100644 --- a/doc/weka/core/stemmers/KeseljSipkaStemmerOptimal.html +++ b/doc/weka/core/stemmers/KeseljSipkaStemmerOptimal.html @@ -2,10 +2,10 @@ - + KeseljSipkaStemmerOptimal - + diff --git a/doc/weka/core/stemmers/LjubesicPandzicStemmer.html b/doc/weka/core/stemmers/LjubesicPandzicStemmer.html index 1df9209..df68b7e 100644 --- a/doc/weka/core/stemmers/LjubesicPandzicStemmer.html +++ b/doc/weka/core/stemmers/LjubesicPandzicStemmer.html @@ -2,10 +2,10 @@ - + LjubesicPandzicStemmer - + @@ -19,7 +19,7 @@ catch(err) { } //--> -var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10}; +var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10}; var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; var altColor = "altColor"; var rowColor = "rowColor"; @@ -249,30 +249,38 @@

Method Summary

+private java.lang.String +convertCyrillicToLatinCharacter(char character)  + + +private java.lang.String +convertCyrillicToLatinString(java.lang.String wordOrLine)  + + private boolean hasAVowel(java.lang.String word)
Proverava da li reč sadrži samoglasnik/slogotvorno R
- + protected void initRules()
Inicijalizuje pravila za stemovanje
- + java.lang.String stemLine(java.lang.String line)
Stemuje liniju teksta
- + java.lang.String stemWord(java.lang.String word)
Ako se naiđe na neku od stop-reči, ona se preskače.
- + private java.lang.String transform(java.lang.String word)
Zamenjuje sufiks reči transformisanom varijantom tog sufiksa
@@ -515,6 +523,24 @@

hasAVowel

+ + + +
    +
  • +

    convertCyrillicToLatinString

    +
    private java.lang.String convertCyrillicToLatinString(java.lang.String wordOrLine)
    +
  • +
+ + + +
    +
  • +

    convertCyrillicToLatinCharacter

    +
    private java.lang.String convertCyrillicToLatinCharacter(char character)
    +
  • +
diff --git a/doc/weka/core/stemmers/MilosevicStemmer.html b/doc/weka/core/stemmers/MilosevicStemmer.html index 1220fdb..3b93789 100644 --- a/doc/weka/core/stemmers/MilosevicStemmer.html +++ b/doc/weka/core/stemmers/MilosevicStemmer.html @@ -2,10 +2,10 @@ - + MilosevicStemmer - + diff --git a/doc/weka/core/stemmers/SCStemmer.html b/doc/weka/core/stemmers/SCStemmer.html index 6003c03..aff6c30 100644 --- a/doc/weka/core/stemmers/SCStemmer.html +++ b/doc/weka/core/stemmers/SCStemmer.html @@ -2,10 +2,10 @@ - + SCStemmer - + diff --git a/doc/weka/core/stemmers/SerbianStemmer.html b/doc/weka/core/stemmers/SerbianStemmer.html index 6085fd0..1424c8e 100644 --- a/doc/weka/core/stemmers/SerbianStemmer.html +++ b/doc/weka/core/stemmers/SerbianStemmer.html @@ -2,10 +2,10 @@ - + SerbianStemmer - + diff --git a/doc/weka/core/stemmers/class-use/KeseljSipkaStemmer.html b/doc/weka/core/stemmers/class-use/KeseljSipkaStemmer.html index 12bf4fc..78ced70 100644 --- a/doc/weka/core/stemmers/class-use/KeseljSipkaStemmer.html +++ b/doc/weka/core/stemmers/class-use/KeseljSipkaStemmer.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.KeseljSipkaStemmer - + diff --git a/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerGreedy.html b/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerGreedy.html index 5c2fc1d..756499d 100644 --- a/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerGreedy.html +++ b/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerGreedy.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.KeseljSipkaStemmerGreedy - + diff --git a/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerOptimal.html b/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerOptimal.html index 656c770..5a812d2 100644 --- a/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerOptimal.html +++ b/doc/weka/core/stemmers/class-use/KeseljSipkaStemmerOptimal.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.KeseljSipkaStemmerOptimal - + diff --git a/doc/weka/core/stemmers/class-use/LjubesicPandzicStemmer.html b/doc/weka/core/stemmers/class-use/LjubesicPandzicStemmer.html index d36526f..01e1a34 100644 --- a/doc/weka/core/stemmers/class-use/LjubesicPandzicStemmer.html +++ b/doc/weka/core/stemmers/class-use/LjubesicPandzicStemmer.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.LjubesicPandzicStemmer - + diff --git a/doc/weka/core/stemmers/class-use/MilosevicStemmer.html b/doc/weka/core/stemmers/class-use/MilosevicStemmer.html index df71103..7200d06 100644 --- a/doc/weka/core/stemmers/class-use/MilosevicStemmer.html +++ b/doc/weka/core/stemmers/class-use/MilosevicStemmer.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.MilosevicStemmer - + diff --git a/doc/weka/core/stemmers/class-use/SCStemmer.html b/doc/weka/core/stemmers/class-use/SCStemmer.html index 77fce5c..a04fdd3 100644 --- a/doc/weka/core/stemmers/class-use/SCStemmer.html +++ b/doc/weka/core/stemmers/class-use/SCStemmer.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.SCStemmer - + diff --git a/doc/weka/core/stemmers/class-use/SerbianStemmer.html b/doc/weka/core/stemmers/class-use/SerbianStemmer.html index 4110cfc..a489607 100644 --- a/doc/weka/core/stemmers/class-use/SerbianStemmer.html +++ b/doc/weka/core/stemmers/class-use/SerbianStemmer.html @@ -2,10 +2,10 @@ - + Uses of Class weka.core.stemmers.SerbianStemmer - + diff --git a/doc/weka/core/stemmers/package-frame.html b/doc/weka/core/stemmers/package-frame.html index 57ec6c6..45109ab 100644 --- a/doc/weka/core/stemmers/package-frame.html +++ b/doc/weka/core/stemmers/package-frame.html @@ -2,10 +2,10 @@ - + weka.core.stemmers - + diff --git a/doc/weka/core/stemmers/package-summary.html b/doc/weka/core/stemmers/package-summary.html index 9938a37..e14beb1 100644 --- a/doc/weka/core/stemmers/package-summary.html +++ b/doc/weka/core/stemmers/package-summary.html @@ -2,10 +2,10 @@ - + weka.core.stemmers - + diff --git a/doc/weka/core/stemmers/package-tree.html b/doc/weka/core/stemmers/package-tree.html index f12999f..d8d655c 100644 --- a/doc/weka/core/stemmers/package-tree.html +++ b/doc/weka/core/stemmers/package-tree.html @@ -2,10 +2,10 @@ - + weka.core.stemmers Class Hierarchy - + diff --git a/doc/weka/core/stemmers/package-use.html b/doc/weka/core/stemmers/package-use.html index f42f3c5..1871ff5 100644 --- a/doc/weka/core/stemmers/package-use.html +++ b/doc/weka/core/stemmers/package-use.html @@ -2,10 +2,10 @@ - + Uses of Package weka.core.stemmers - +