diff --git a/README.md b/README.md
index 07cf60c..f018083 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,9 @@ twitter-korean-text는 normalization, tokenization, stemming, phrase extraction
* 한국어를 처리하는 예시입니다 ㅋㅋ -> 한국어, 처리, 예시, 처리하는 예시
+Introductory Presentation: [Google Slides](https://docs.google.com/presentation/d/10CZj8ry03oCk_Jqw879HFELzOLjJZ0EOi4KJbtRSIeU)
+
+
## Try it here
Gunja Agrawal kindly created a test API webpage for this project: [http://gunjaagrawal.com/langhack/](http://gunjaagrawal.com/langhack/)
@@ -39,7 +42,7 @@ Opensourced here: [twitter-korean-tokenizer-api](https://github.com/gunjaag/twit
## API
[scaladoc](http://twitter.github.io/twitter-korean-text/scaladocs/#com.twitter.penguin.korean.TwitterKoreanProcessor$)
-[Auto-generated Documentation](http://twitter.github.io/twitter-korean-text)
+[mavendoc](http://twitter.github.io/twitter-korean-text)
## Maven
diff --git a/pom.xml b/pom.xml
index 0cbf769..698ef68 100644
--- a/pom.xml
+++ b/pom.xml
@@ -91,6 +91,7 @@
org.slf4j
slf4j-nop
1.5.8
+ provided
org.scalatest
diff --git a/src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanTokenizer.scala b/src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanTokenizer.scala
index 877263e..0fe6679 100644
--- a/src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanTokenizer.scala
+++ b/src/main/scala/com/twitter/penguin/korean/tokenizer/KoreanTokenizer.scala
@@ -251,14 +251,20 @@ object KoreanTokenizer {
* @return sequence of KoreanTokens
*/
def tokenize(text: CharSequence): Seq[KoreanToken] = {
- chunk(text).flatMap {
- case token: KoreanToken if token.pos == Korean =>
- // Get the best parse of each chunk
- val parsed = parseKoreanChunk(token)
-
- // Collapse sequence of one-char nouns into one unknown noun: (가Noun 회Noun -> 가회Noun*)
- collapseNouns(parsed)
- case token: KoreanToken => Seq(token)
+ try {
+ chunk(text).flatMap {
+ case token: KoreanToken if token.pos == Korean =>
+ // Get the best parse of each chunk
+ val parsed = parseKoreanChunk(token)
+
+ // Collapse sequence of one-char nouns into one unknown noun: (가Noun 회Noun -> 가회Noun*)
+ collapseNouns(parsed)
+ case token: KoreanToken => Seq(token)
+ }
+ } catch {
+ case e: Exception =>
+ System.err.println(s"Error tokenizing a chunk: $text")
+ throw e
}
}
}
\ No newline at end of file