From 1abde1749987775f2d8cb32d9d1cdd73a9b6cf22 Mon Sep 17 00:00:00 2001 From: khituras Date: Tue, 20 Feb 2024 08:33:26 +0100 Subject: [PATCH] Version 2.6.2. Add exception handling. Although the issue observed was eventually solved in the xml-mapper. --- jcore-jsbd-ae/pom.xml | 2 ++ .../jcore/ae/jsbd/main/SentenceAnnotator.java | 14 ++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/jcore-jsbd-ae/pom.xml b/jcore-jsbd-ae/pom.xml index fcde4c4f9..79b53ef66 100644 --- a/jcore-jsbd-ae/pom.xml +++ b/jcore-jsbd-ae/pom.xml @@ -14,6 +14,8 @@ 2.6.1 + 2.6.2-SNAPSHOT + diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java index 583db41a1..2acfe21e8 100644 --- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java +++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java @@ -202,10 +202,16 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException { while (start < end && (Character.isWhitespace(documentText.getCodensedText().charAt(start)))) ++start; - // get the string between the current annotation borders and recognized sentences - String textSpan = documentText.getCodensedText().substring(start, end); - if (!StringUtils.isBlank(textSpan)) - doSegmentation(documentText, textSpan, start); + String textSpan; + try { + // get the string between the current annotation borders and recognized sentences + textSpan = documentText.getCodensedText().substring(start, end); + } catch (Exception e) { + LOGGER.error("Document text boundary error. Tried to get substring from {} to {} for text \"{}\". Text condensation is {}", start, end, documentText.getCodensedText(), cutAwayTypes != null && !cutAwayTypes.isEmpty()); + throw e; + } + if (!StringUtils.isBlank(textSpan)) + doSegmentation(documentText, textSpan, start); } } catch (ClassNotFoundException e) {