From 4d2d6c91fcd1eb8214ac3c9e5ec8a2c07d0d0374 Mon Sep 17 00:00:00 2001 From: Kevin Crosby Date: Tue, 31 Jan 2017 12:05:20 -0600 Subject: [PATCH] Added log prior calculations in CachingNaiveBayesClassifier. --- .../classification/CachingNaiveBayesClassifier.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java index 6fe683546d06..30a3a358358e 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/CachingNaiveBayesClassifier.java @@ -88,11 +88,17 @@ protected List> assignClassNormalizedList(String return asignedClassesNorm; } + private double calculateLogPrior(BytesRef cclass) throws IOException { + Term term = new Term(this.classFieldName, cclass); + int docsWithC = indexReader.docFreq(term); + return Math.log((double) docsWithC) - Math.log(docsWithClassSize); + } + private List> calculateLogLikelihood(String[] tokenizedText) throws IOException { // initialize the return List ArrayList> ret = new ArrayList<>(); for (BytesRef cclass : cclasses) { - ClassificationResult cr = new ClassificationResult<>(cclass, 0d); + ClassificationResult cr = new ClassificationResult<>(cclass, calculateLogPrior(cclass)); ret.add(cr); } // for each word