diff --git a/.gitignore b/.gitignore index 6383c12..1b6d489 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ lib/openccg.jar output/ src/ccg2xml/ccg2xml.py src/srilmbridge/*.h +*.iml +.idea/ +target/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..478fa92 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +package: + mvn package + +tests: + mvn test diff --git a/README.md b/README.md index 0f1214c..d1f8b6c 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,13 @@ This release also includes a broad English coverage grammar from the CCGBank and * Version 1.6 or later of the Java 2 SDK (http://java.sun.com) * For ccg2xml and other tools, Python version 2.4 to 2.7 (http://www.python.org) +# Using Maven +*****NOTE** maven will take care dependency downloading step for you, hence steps bellow can be ignored + +All of the standard maven usage rules applies, to simply make `jar` you can do `mvn package`, if you're using GNU Make, can do `make package`. +The result is `openccg.jar` which can be found on `src/target` dir. + +Tests can be run via `mvn test`, or `make tests` # Libraries diff --git a/pom.xml b/pom.xml index cf80c5a..a16ab31 100644 --- a/pom.xml +++ b/pom.xml @@ -1,19 +1,64 @@ - - - 4.0.0 - opennlp - ccg - 0.10.0 - pom - - - src/ - - - - 1.8 - 1.8 - - - + + + 4.0.0 + acceleratedtext + openccg-parent + 0.10.4 + pom + + + src/ + + + + https://github.com/tokenmill/openccg.git + scm:git:https://github.com/tokenmill/openccg.git + scm:git:https://github.com/tokenmill/openccg.git + openccg-0.10.1 + + + + 1.8 + 1.8 + + + + src + + + src + + + classes + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.1.1 + + + -Xdoclint:none + target.* + + + + + + + + + clojars + https://repo.clojars.org/ + + + + + + clojars + https://repo.clojars.org + + + + diff --git a/src/opennlp/ccg/builders/GrammarBuilder.java b/src/opennlp/ccg/builders/GrammarBuilder.java new file mode 100644 index 0000000..2cced1b --- /dev/null +++ b/src/opennlp/ccg/builders/GrammarBuilder.java @@ -0,0 +1,49 @@ +package opennlp.ccg.builders; + +import opennlp.ccg.grammar.Grammar; +import opennlp.ccg.grammar.RuleGroup; +import opennlp.ccg.grammar.Types; +import opennlp.ccg.lexicon.Lexicon; + +public class GrammarBuilder { + public static GrammarBuilder builder(){ + return new GrammarBuilder(); + } + + private Grammar grammar; + + private GrammarBuilder(){ + this.grammar = new Grammar(); + } + + public boolean isGlobalGrammarInit(){ + return this.grammar.theGrammar != null; + } + + public GrammarBuilder withTypes(Types types){ + this.grammar.setTypes(types); + types.setGrammar(this.grammar); + return this; + } + + public GrammarBuilder withLexicon(Lexicon lexicon){ + this.grammar.setLexicon(lexicon); + lexicon.setGrammar(this.grammar); + return this; + } + + public GrammarBuilder withRules(RuleGroup rules){ + this.grammar.setRules(rules); + rules.setGrammar(this.grammar); + return this; + } + + public Grammar build(){ + // Check if we added everything + assert this.grammar.lexicon != null; + assert this.grammar.rules != null; + assert this.grammar.types != null; + + return this.grammar; + } +} diff --git a/src/opennlp/ccg/builders/LexiconBuilder.java b/src/opennlp/ccg/builders/LexiconBuilder.java new file mode 100644 index 0000000..fe6d0c1 --- /dev/null +++ b/src/opennlp/ccg/builders/LexiconBuilder.java @@ -0,0 +1,42 @@ +package opennlp.ccg.builders; + +import opennlp.ccg.lexicon.*; +import java.util.*; + +public class LexiconBuilder { + public static LexiconBuilder builder(){ + return new LexiconBuilder(); + } + + private Lexicon lexicon; + private List familyList = new ArrayList<>(); + private List morphList = new ArrayList<>(); + private List macroList = new ArrayList<>(); + private LexiconBuilder(){ + this.lexicon = new Lexicon(); + } + + public LexiconBuilder addFamily(Family family){ + this.familyList.add(family); + return this; + } + + public LexiconBuilder addMorph(MorphItem item){ + this.morphList.add(item); + return this; + } + + public LexiconBuilder addMacro(MacroItem item){ + this.macroList.add(item); + return this; + } + + public Lexicon ref(){ + return this.lexicon; + } + + public Lexicon build(){ + this.lexicon.init(familyList, morphList, macroList); + return this.lexicon; + } +} diff --git a/src/opennlp/ccg/builders/RulesBuilder.java b/src/opennlp/ccg/builders/RulesBuilder.java new file mode 100644 index 0000000..392f1cd --- /dev/null +++ b/src/opennlp/ccg/builders/RulesBuilder.java @@ -0,0 +1,24 @@ +package opennlp.ccg.builders; + +import opennlp.ccg.grammar.RuleGroup; +import opennlp.ccg.grammar.Rule; + +public class RulesBuilder { + public static RulesBuilder builder(){ + return new RulesBuilder(); + } + + private RuleGroup rules; + private RulesBuilder(){ + this.rules = new RuleGroup(); + } + + public RulesBuilder addRule(Rule rule){ + this.rules.addRule(rule); + return this; + } + + public RuleGroup build(){ + return this.rules; + } +} diff --git a/src/opennlp/ccg/builders/TypesBuilder.java b/src/opennlp/ccg/builders/TypesBuilder.java new file mode 100644 index 0000000..0b6edd8 --- /dev/null +++ b/src/opennlp/ccg/builders/TypesBuilder.java @@ -0,0 +1,39 @@ +package opennlp.ccg.builders; + +import opennlp.ccg.grammar.Types; +import org.jdom.Element; + +import java.util.ArrayList; +import java.util.List; + +public class TypesBuilder { + public static TypesBuilder builder(){ + return new TypesBuilder(); + } + + private List elementList = new ArrayList<>(); + private Types types; + private TypesBuilder(){ + this.types = new Types(); + } + + public TypesBuilder addType(String name){ + Element el = new Element("type"); + el.setAttribute("name", name); + elementList.add(el); + return this; + } + + public TypesBuilder addType(String name, String parents){ + Element el = new Element("type"); + el.setAttribute("name", name); + el.setAttribute("parents", parents); + elementList.add(el); + return this; + } + + public Types build(){ + this.types.readTypes(this.elementList); + return this.types; + } +} diff --git a/src/opennlp/ccg/grammar/Grammar.java b/src/opennlp/ccg/grammar/Grammar.java index df632b2..dcb06cc 100644 --- a/src/opennlp/ccg/grammar/Grammar.java +++ b/src/opennlp/ccg/grammar/Grammar.java @@ -52,13 +52,13 @@ public class Grammar { /** The lexicon. */ - public final Lexicon lexicon; + public Lexicon lexicon; /** The rule group. */ - public final RuleGroup rules; + public RuleGroup rules; /** The type hierarchy. */ - public final Types types; + public Types types; /** The features to include in supertags. */ public final Set supertagFeatures = new HashSet(); @@ -109,7 +109,25 @@ public class Grammar { }; // set of boundary tones - private static Set boundaryTonesSet = null; + private static Set boundaryTonesSet = null; + + public Grammar(){ + theGrammar = this; + this.fromXmlTransforms = new URL[0]; + this.toXmlTransforms = new URL[0]; + } + + public void setTypes(Types types){ + this.types = types; + } + + public void setLexicon(Lexicon lexicon){ + this.lexicon = lexicon; + } + + public void setRules(RuleGroup rules){ + this.rules = rules; + } /** Loads a grammar from the given filename. */ diff --git a/src/opennlp/ccg/grammar/RuleGroup.java b/src/opennlp/ccg/grammar/RuleGroup.java index 324c884..fa87a91 100644 --- a/src/opennlp/ccg/grammar/RuleGroup.java +++ b/src/opennlp/ccg/grammar/RuleGroup.java @@ -44,7 +44,7 @@ * @author Michael White * @version $Revision: 1.32 $, $Date: 2011/06/07 05:12:01 $ */ -public class RuleGroup implements Serializable { +public class RuleGroup implements Serializable, WithGrammar { private static final long serialVersionUID = -6240266013357142289L; @@ -156,6 +156,11 @@ SupercatRuleCombo get(SupercatRuleCombo combo) { /** * Constructs an empty rule group for the given grammar. */ + + public RuleGroup(){ + bapp.setRuleGroup(this); + } + public RuleGroup(Grammar grammar) { this.grammar = grammar; bapp.setRuleGroup(this); @@ -185,6 +190,10 @@ public void handleElement(Element ruleEl) { ruleScanner.parse(url); } + public void setGrammar(Grammar grammar){ + this.grammar = grammar; + } + // during deserialization, sets grammar to the current grammar private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { diff --git a/src/opennlp/ccg/grammar/Types.java b/src/opennlp/ccg/grammar/Types.java index ef630da..c19ae14 100644 --- a/src/opennlp/ccg/grammar/Types.java +++ b/src/opennlp/ccg/grammar/Types.java @@ -1,262 +1,269 @@ -/////////////////////////////////////////////////////////////////////////////// -//// Copyright (C) 2003-4 Gunes Erkan and University of Edinburgh (Michael White) -//// -//// This library is free software; you can redistribute it and/or -//// modify it under the terms of the GNU Lesser General Public -//// License as published by the Free Software Foundation; either -//// version 2.1 of the License, or (at your option) any later version. -//// -//// This library is distributed in the hope that it will be useful, -//// but WITHOUT ANY WARRANTY; without even the implied warranty of -//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//// GNU Lesser General Public License for more details. -//// -//// You should have received a copy of the GNU Lesser General Public -//// License along with this program; if not, write to the Free Software -//// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -//////////////////////////////////////////////////////////////////////////////// - -package opennlp.ccg.grammar; - -import opennlp.ccg.util.*; -import opennlp.ccg.unify.*; - -import org.jdom.*; -import org.jdom.input.*; - -import java.io.*; -import java.net.*; -import java.util.*; - -import gnu.trove.*; - -/** - * Class for constructing and holding the hierarchical simple type maps. - * - * @author Gunes Erkan - * @author Michael White - * @version $Revision: 1.13 $, $Date: 2009/12/21 03:27:18 $ - */ -public class Types { - - public final Grammar grammar; - private final HashMap nameToType = new HashMap(); - private final ArrayList indexToType = new ArrayList(); - private int maxTypeIndex = 0; - public static final String TOP_TYPE = "top"; - public static final String BOT_TYPE = "bottom"; - - /** Constructor for an empty hierarchy (with just the top type). */ - public Types(Grammar grammar) { - getSimpleType(TOP_TYPE); - this.grammar = grammar; - } - - /** - * Constructs the type hierarchy from the given URL, for - * the given grammar. - */ - @SuppressWarnings("unchecked") - public Types(URL url, Grammar grammar) throws IOException { - this.grammar = grammar; - SAXBuilder builder = new SAXBuilder(); - Document doc; - try { - doc = builder.build(url); - } - catch (JDOMException exc) { - getSimpleType(TOP_TYPE); - throw (IOException) new IOException().initCause(exc); - } - List entries = doc.getRootElement().getChildren(); - readTypes(entries); - // for debugging: print the indexToType list - //printTypes(); - } - - /** Returns the simple type with the given name, or a new one if none yet exists. */ - public SimpleType getSimpleType(String typeName) { - SimpleType type = nameToType.get(typeName); - if (type == null) { - BitSet bs = new BitSet(); - bs.set(maxTypeIndex); - SimpleType newtype = new SimpleType(maxTypeIndex, typeName, bs, this); - nameToType.put(typeName, newtype); - indexToType.add(newtype); - nameToType.get(TOP_TYPE).getBitSet().set(maxTypeIndex++); - return newtype; - } - else return type; - } - - /** Returns whether there is a simple type with the given name. */ - public boolean containsSimpleType(String typeName) { - return nameToType.containsKey(typeName); - } - - /** Returns the list of types, with parents preceding children in the hierarchy. */ - public ArrayList getIndexMap() { - return indexToType; - } - - - /** Reads the rules and constructs the nameToType and indexToType maps. */ - private void readTypes(List _types) { - - GroupMap hierarchy = new GroupMap(); // map from types to all subtypes - GroupMap parents = new GroupMap(); // map from types to parents - TObjectIntHashMap depthMap = new TObjectIntHashMap(); // map from types to max depth - - // Construct the initial hierarchy of types without - // taking transitive closure. - // Also store parents. - for (int i=0; i < _types.size(); i++) { - Element typeEl = _types.get(i); - String typeName = typeEl.getAttributeValue("name"); - String _parents = typeEl.getAttributeValue("parents"); - hierarchy.put(typeName, BOT_TYPE); - if (_parents == null) { - hierarchy.put(TOP_TYPE, typeName); - parents.put(typeName, TOP_TYPE); - } - else { - String[] parentsArray = _parents.split("\\s+"); - for (int j = 0; j < parentsArray.length; j++) { - hierarchy.put(parentsArray[j], typeName); - parents.put(typeName, parentsArray[j]); - } - } - } - - // Compute depth from parents. - for (String type : parents.keySet()) { - int depth = computeDepth(type, parents, type); - depthMap.put(type, depth); - } - - // Compute ALL subtypes of each type and insert into the hierarchy. - for (String type : hierarchy.keySet()) { - hierarchy.putAll(type, findAllSubtypes(hierarchy, type)); - } - - // Assign a unique int to each type in breadth-first order. - // Then create the string -> SimpleType map. - createSimpleTypes(hierarchy, depthMap); - } - - /** Returns the max depth of the given type, checking for cycles. */ - private static int computeDepth(String type, GroupMap parents, String startType) { - if (type.equals(TOP_TYPE)) return 0; - int maxParentDepth = 0; - Set parentSet = parents.get(type); - if (parentSet != null) { - for (String parent : parentSet) { - if (parent.equals(startType)) { - throw new RuntimeException("Error, type hierarchy contains cycle from/to: " + startType); - } - int parentDepth = computeDepth(parent, parents, startType); - maxParentDepth = Math.max(maxParentDepth, parentDepth); - } - } - return maxParentDepth + 1; - } - - /** - * Computes the list of all sub-types of a given type (key) - * in depth-first order. - */ - private Collection findAllSubtypes(GroupMap hierarchy, String key) { - ArrayList subs = new ArrayList(); - if (hierarchy.get(key) != null) { - Stack look = new Stack(); - for (String type : hierarchy.get(key)) { - look.push(type); - } - for (; !look.empty() ; ) { - String new_sub = look.pop(); - subs.add(new_sub); - if (hierarchy.get(new_sub) != null) { - for (String type : hierarchy.get(new_sub)) { - look.push(type); - } - } - } - } - return subs; - } - - /** - * Creates the SimpleType objects and constructs the nameToType and indexToType maps. - */ - private void createSimpleTypes(GroupMap hierarchy, TObjectIntHashMap depthMap) { - - // find max depth - int maxDepth = 0; - int[] depths = depthMap.getValues(); - for (int i = 0; i < depths.length; i++) { - maxDepth = Math.max(maxDepth, depths[i]); - } - - // add types in order of increasing depth - ArrayList typesVisited = new ArrayList(); - typesVisited.add(TOP_TYPE); - Object[] types = depthMap.keys(); - ArrayList typesAtSameDepth = new ArrayList(); - for (int i = 1; i <= maxDepth; i++) { - typesAtSameDepth.clear(); - for (int j = 0; j < types.length; j++) { - if (depthMap.get(types[j]) == i) - typesAtSameDepth.add((String)types[j]); - } - Collections.sort(typesAtSameDepth); - typesVisited.addAll(typesAtSameDepth); - } - - // construct the maps - for (int i=0; i < typesVisited.size(); i++) { - String typeName = typesVisited.get(i); - BitSet bitset = new BitSet(); - bitset.set(i); - if (hierarchy.get(typeName) != null) { - for (String type : hierarchy.get(typeName)) { - int indexToSet = typesVisited.indexOf(type); - if (indexToSet != -1) bitset.set(indexToSet); - } - } - SimpleType st = new SimpleType(i, typeName, bitset, this); - nameToType.put(typeName, st); - indexToType.add(st); - } - maxTypeIndex = typesVisited.size(); - } - - /** - * Prints the types and their subtypes to System.out. - */ - public void printTypes() { - System.out.println("types:"); - for (int i=0; i < indexToType.size(); i++) { - SimpleType st = indexToType.get(i); - System.out.println(i + ": " + st.getName() + " subtypes: " + st.getBitSet()); - } - System.out.println(); - } - - /** Tests serialization of simple types, including resolution. */ - public void debugSerialization() throws IOException, ClassNotFoundException { - // test serialization - SimpleType st = indexToType.get(1); - String filename = "tmp.ser"; - ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(filename)); - System.out.println("Writing st: " + st.getIndex() + ": " + st + " " + st.getBitSet()); - out.writeObject(st); - out.close(); - ObjectInputStream in = new ObjectInputStream(new FileInputStream(filename)); - System.out.print("Reading st2: "); - SimpleType st2 = (SimpleType) in.readObject(); - System.out.println(st2.getIndex() + ": " + st2 + " " + st2.getBitSet()); - in.close(); - // test identity (and thus readResolve) - System.out.println("st == st2?: " + (st == st2)); - } -} +/////////////////////////////////////////////////////////////////////////////// +//// Copyright (C) 2003-4 Gunes Erkan and University of Edinburgh (Michael White) +//// +//// This library is free software; you can redistribute it and/or +//// modify it under the terms of the GNU Lesser General Public +//// License as published by the Free Software Foundation; either +//// version 2.1 of the License, or (at your option) any later version. +//// +//// This library is distributed in the hope that it will be useful, +//// but WITHOUT ANY WARRANTY; without even the implied warranty of +//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//// GNU Lesser General Public License for more details. +//// +//// You should have received a copy of the GNU Lesser General Public +//// License along with this program; if not, write to the Free Software +//// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +//////////////////////////////////////////////////////////////////////////////// + +package opennlp.ccg.grammar; + +import opennlp.ccg.util.*; +import opennlp.ccg.unify.*; + +import org.jdom.*; +import org.jdom.input.*; + +import java.io.*; +import java.net.*; +import java.util.*; + +import gnu.trove.*; + +/** + * Class for constructing and holding the hierarchical simple type maps. + * + * @author Gunes Erkan + * @author Michael White + * @version $Revision: 1.13 $, $Date: 2009/12/21 03:27:18 $ + */ +public class Types implements WithGrammar { + + public Grammar grammar; + private final HashMap nameToType = new HashMap(); + private final ArrayList indexToType = new ArrayList(); + private int maxTypeIndex = 0; + public static final String TOP_TYPE = "top"; + public static final String BOT_TYPE = "bottom"; + + /** Constructor for an empty hierarchy (with just the top type). */ + public Types(){ + } + + public Types(Grammar grammar) { + getSimpleType(TOP_TYPE); + this.grammar = grammar; + } + + /** + * Constructs the type hierarchy from the given URL, for + * the given grammar. + */ + @SuppressWarnings("unchecked") + public Types(URL url, Grammar grammar) throws IOException { + this.grammar = grammar; + SAXBuilder builder = new SAXBuilder(); + Document doc; + try { + doc = builder.build(url); + } + catch (JDOMException exc) { + getSimpleType(TOP_TYPE); + throw (IOException) new IOException().initCause(exc); + } + List entries = doc.getRootElement().getChildren(); + readTypes(entries); + // for debugging: print the indexToType list + // printTypes(); + } + + public void setGrammar(Grammar grammar){ + this.grammar = grammar; + } + + /** Returns the simple type with the given name, or a new one if none yet exists. */ + public SimpleType getSimpleType(String typeName) { + SimpleType type = nameToType.get(typeName); + if (type == null) { + BitSet bs = new BitSet(); + bs.set(maxTypeIndex); + SimpleType newtype = new SimpleType(maxTypeIndex, typeName, bs, this); + nameToType.put(typeName, newtype); + indexToType.add(newtype); + nameToType.get(TOP_TYPE).getBitSet().set(maxTypeIndex++); + return newtype; + } + else return type; + } + + /** Returns whether there is a simple type with the given name. */ + public boolean containsSimpleType(String typeName) { + return nameToType.containsKey(typeName); + } + + /** Returns the list of types, with parents preceding children in the hierarchy. */ + public ArrayList getIndexMap() { + return indexToType; + } + + + /** Reads the rules and constructs the nameToType and indexToType maps. */ + public void readTypes(List _types) { + + GroupMap hierarchy = new GroupMap(); // map from types to all subtypes + GroupMap parents = new GroupMap(); // map from types to parents + TObjectIntHashMap depthMap = new TObjectIntHashMap(); // map from types to max depth + + // Construct the initial hierarchy of types without + // taking transitive closure. + // Also store parents. + for (int i=0; i < _types.size(); i++) { + Element typeEl = _types.get(i); + String typeName = typeEl.getAttributeValue("name"); + String _parents = typeEl.getAttributeValue("parents"); + hierarchy.put(typeName, BOT_TYPE); + if (_parents == null) { + hierarchy.put(TOP_TYPE, typeName); + parents.put(typeName, TOP_TYPE); + } + else { + String[] parentsArray = _parents.split("\\s+"); + for (int j = 0; j < parentsArray.length; j++) { + hierarchy.put(parentsArray[j], typeName); + parents.put(typeName, parentsArray[j]); + } + } + } + + // Compute depth from parents. + for (String type : parents.keySet()) { + int depth = computeDepth(type, parents, type); + depthMap.put(type, depth); + } + + // Compute ALL subtypes of each type and insert into the hierarchy. + for (String type : hierarchy.keySet()) { + hierarchy.putAll(type, findAllSubtypes(hierarchy, type)); + } + + // Assign a unique int to each type in breadth-first order. + // Then create the string -> SimpleType map. + createSimpleTypes(hierarchy, depthMap); + } + + /** Returns the max depth of the given type, checking for cycles. */ + private static int computeDepth(String type, GroupMap parents, String startType) { + if (type.equals(TOP_TYPE)) return 0; + int maxParentDepth = 0; + Set parentSet = parents.get(type); + if (parentSet != null) { + for (String parent : parentSet) { + if (parent.equals(startType)) { + throw new RuntimeException("Error, type hierarchy contains cycle from/to: " + startType); + } + int parentDepth = computeDepth(parent, parents, startType); + maxParentDepth = Math.max(maxParentDepth, parentDepth); + } + } + return maxParentDepth + 1; + } + + /** + * Computes the list of all sub-types of a given type (key) + * in depth-first order. + */ + private Collection findAllSubtypes(GroupMap hierarchy, String key) { + ArrayList subs = new ArrayList(); + if (hierarchy.get(key) != null) { + Stack look = new Stack(); + for (String type : hierarchy.get(key)) { + look.push(type); + } + for (; !look.empty() ; ) { + String new_sub = look.pop(); + subs.add(new_sub); + if (hierarchy.get(new_sub) != null) { + for (String type : hierarchy.get(new_sub)) { + look.push(type); + } + } + } + } + return subs; + } + + /** + * Creates the SimpleType objects and constructs the nameToType and indexToType maps. + */ + private void createSimpleTypes(GroupMap hierarchy, TObjectIntHashMap depthMap) { + + // find max depth + int maxDepth = 0; + int[] depths = depthMap.getValues(); + for (int i = 0; i < depths.length; i++) { + maxDepth = Math.max(maxDepth, depths[i]); + } + + // add types in order of increasing depth + ArrayList typesVisited = new ArrayList(); + typesVisited.add(TOP_TYPE); + Object[] types = depthMap.keys(); + ArrayList typesAtSameDepth = new ArrayList(); + for (int i = 1; i <= maxDepth; i++) { + typesAtSameDepth.clear(); + for (int j = 0; j < types.length; j++) { + if (depthMap.get(types[j]) == i) + typesAtSameDepth.add((String)types[j]); + } + Collections.sort(typesAtSameDepth); + typesVisited.addAll(typesAtSameDepth); + } + + // construct the maps + for (int i=0; i < typesVisited.size(); i++) { + String typeName = typesVisited.get(i); + BitSet bitset = new BitSet(); + bitset.set(i); + if (hierarchy.get(typeName) != null) { + for (String type : hierarchy.get(typeName)) { + int indexToSet = typesVisited.indexOf(type); + if (indexToSet != -1) bitset.set(indexToSet); + } + } + SimpleType st = new SimpleType(i, typeName, bitset, this); + nameToType.put(typeName, st); + indexToType.add(st); + } + maxTypeIndex = typesVisited.size(); + } + + /** + * Prints the types and their subtypes to System.out. + */ + public void printTypes() { + System.out.println("types:"); + for (int i=0; i < indexToType.size(); i++) { + SimpleType st = indexToType.get(i); + System.out.println(i + ": " + st.getName() + " subtypes: " + st.getBitSet()); + } + System.out.println(); + } + + /** Tests serialization of simple types, including resolution. */ + public void debugSerialization() throws IOException, ClassNotFoundException { + // test serialization + SimpleType st = indexToType.get(1); + String filename = "tmp.ser"; + ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(filename)); + System.out.println("Writing st: " + st.getIndex() + ": " + st + " " + st.getBitSet()); + out.writeObject(st); + out.close(); + ObjectInputStream in = new ObjectInputStream(new FileInputStream(filename)); + System.out.print("Reading st2: "); + SimpleType st2 = (SimpleType) in.readObject(); + System.out.println(st2.getIndex() + ": " + st2 + " " + st2.getBitSet()); + in.close(); + // test identity (and thus readResolve) + System.out.println("st == st2?: " + (st == st2)); + } +} diff --git a/src/opennlp/ccg/grammar/WithGrammar.java b/src/opennlp/ccg/grammar/WithGrammar.java new file mode 100644 index 0000000..ce34094 --- /dev/null +++ b/src/opennlp/ccg/grammar/WithGrammar.java @@ -0,0 +1,5 @@ +package opennlp.ccg.grammar; + +public interface WithGrammar { + public void setGrammar(Grammar grammar); +} diff --git a/src/opennlp/ccg/hylo/HyloHelper.java b/src/opennlp/ccg/hylo/HyloHelper.java index f57d697..877eae4 100644 --- a/src/opennlp/ccg/hylo/HyloHelper.java +++ b/src/opennlp/ccg/hylo/HyloHelper.java @@ -660,6 +660,7 @@ public int compare(LF lf1, LF lf2){ String rel1 = getRel(lf1); String rel2 = getRel(lf2); Lexicon theLexicon = Grammar.theGrammar.lexicon; + assert theLexicon != null; Integer rel1Index = theLexicon.getRelationSortIndex(rel1); Integer rel2Index = theLexicon.getRelationSortIndex(rel2); int relIndexCompare = rel1Index.compareTo(rel2Index); diff --git a/src/opennlp/ccg/lexicon/Lexicon.java b/src/opennlp/ccg/lexicon/Lexicon.java index 6cafebb..f81ba5b 100644 --- a/src/opennlp/ccg/lexicon/Lexicon.java +++ b/src/opennlp/ccg/lexicon/Lexicon.java @@ -44,7 +44,7 @@ * @author Michael White * @version $Revision: 1.78 $, $Date: 2011/10/31 02:01:06 $ */ -public class Lexicon { +public class Lexicon implements WithGrammar { /** Flag used to indicate a purely syntactic edge, with no associated semantics. */ public static final String NO_SEM_FLAG = "*NoSem*"; @@ -88,7 +88,7 @@ public class Lexicon { private Interner lookupCache = new Interner(true); /** The grammar that this lexicon is part of. */ - public final Grammar grammar; + public Grammar grammar; /** The tokenizer. (Defaults to DefaultTokenizer.) */ public final Tokenizer tokenizer; @@ -103,6 +103,13 @@ public class Lexicon { /************************************************************* * Constructor *************************************************************/ + public Lexicon(){ + this.tokenizer = new DefaultTokenizer(); + Grammar.theGrammar.lexicon = this; + loadLicensingFeatures(null); + loadRelationSortOrder(null); + } + public Lexicon(Grammar grammar) { this.grammar = grammar; this.tokenizer = new DefaultTokenizer(); @@ -118,20 +125,12 @@ public Lexicon(Grammar grammar, Tokenizer tokenizer) { /** Sets the supertagger (null if none). */ public void setSupertagger(SupertaggerAdapter supertagger) { _supertagger = supertagger; } - - - /** Loads the lexicon and morph files. */ - public void init(URL lexiconUrl, URL morphUrl) throws IOException { - - List lexicon = null; - List morph = null; - List macroModel = null; - - // load category families (lexicon), morph forms and macros - lexicon = getLexicon(lexiconUrl); - Pair,List> morphInfo = getMorph(morphUrl); - morph = morphInfo.a; macroModel = morphInfo.b; + public void setGrammar(Grammar grammar){ + this.grammar = grammar; + } + + public void init(List lexicon, List morph, List macroModel){ // index words; also index stems to words, as default preds // store indexed coarticulation attrs too _words = new GroupMap(); @@ -148,7 +147,7 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { Pair first = indexingWord.getSurfaceAttrValPairs().next(); _indexedCoartAttrs.add(first.a); for (Iterator> it = surfaceWord.getSurfaceAttrValPairs(); it.hasNext(); ) { - Pair p = it.next(); + Pair p = it.next(); _coartAttrs.add(p.a); } } @@ -162,16 +161,16 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { // also index rels and coart rels to preds _relsToPreds = new GroupMap(); _coartRelsToPreds = new GroupMap(); - // and gather list of attributes used per atomic category type + // and gather list of attributes used per atomic category type _catsToAttrs = new GroupMap(); _lfAttrs = new HashSet(); // and remember family and ent, names, for checking excluded list on morph items HashSet familyAndEntryNames = new HashSet(); - + // index each family for (Family family : lexicon) { - familyAndEntryNames.add(family.getName()); + familyAndEntryNames.add(family.getName()); EntriesItem[] entries = family.getEntries(); DataItem[] data = family.getData(); @@ -185,7 +184,7 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { for (int j=0; j < entries.length; j++) { // index EntriesItem eItem = entries[j]; - _stagToEntries.put(eItem.getSupertag()+family.getPOS(), eItem); + _stagToEntries.put(eItem.getSupertag()+family.getPOS(), eItem); if (eItem.getStem().length() > 0) { _stems.put(eItem.getStem()+family.getPOS(), eItem); } @@ -209,10 +208,10 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { if (!dItem.getStem().equals(dItem.getPred())) { Collection words = (Collection) _predToWords.get(dItem.getStem()); if (words == null) { - if (!openlex) { - System.out.print("Warning: couldn't find words for pred '"); - System.out.println(dItem.getPred() + "' with stem '" + dItem.getStem() + "'"); - } + if (!openlex) { + System.out.print("Warning: couldn't find words for pred '"); + System.out.println(dItem.getPred() + "' with stem '" + dItem.getStem() + "'"); + } } else { for (Iterator it = words.iterator(); it.hasNext(); ) { @@ -223,12 +222,12 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { } // index rels to preds - // nb: this covers relational (eg @xe) and featural (eg @epast) + // nb: this covers relational (eg @xe) and featural (eg @epast) // elementary predications List indexRels = new ArrayList(3); String familyIndexRel = family.getIndexRel(); - if (familyIndexRel.length() > 0) { - indexRels.add(familyIndexRel); + if (familyIndexRel.length() > 0) { + indexRels.add(familyIndexRel); } for (int j=0; j < entries.length; j++) { EntriesItem eItem = entries[j]; @@ -239,13 +238,13 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { } for (Iterator it = indexRels.iterator(); it.hasNext(); ) { String indexRel = it.next(); - // nb: not indexing on entries items, b/c some stems are still defaults + // nb: not indexing on entries items, b/c some stems are still defaults for (int j=0; j < data.length; j++) { DataItem dItem = data[j]; _relsToPreds.put(indexRel, dItem.getPred()); } } - + // index coart rels (features, really) to preds String coartRel = family.getCoartRel(); if (coartRel.length() > 0) { @@ -273,32 +272,46 @@ public void init(URL lexiconUrl, URL morphUrl) throws IOException { // with morph items, check POS, macro names, excluded list for xref for (MorphItem morphItem : morph) { Word w = morphItem.getWord(); - if (!openlex && - !_stems.containsKey(w.getStem() + w.getPOS()) && - !_posToEntries.containsKey(w.getPOS())) + if (!openlex && + !_stems.containsKey(w.getStem() + w.getPOS()) && + !_posToEntries.containsKey(w.getPOS())) { System.err.println( - "Warning: no entries for stem '" + w.getStem() + - "' and POS '" + w.getPOS() + - "' found for word '" + w + "'" + "Warning: no entries for stem '" + w.getStem() + + "' and POS '" + w.getPOS() + + "' found for word '" + w + "'" ); } String[] macroNames = morphItem.getMacros(); for (int j=0; j < macroNames.length; j++) { if (!_macroItems.containsKey(macroNames[j])) { - System.err.println("Warning: macro " + macroNames[j] + - " not found for word '" + morphItem.getWord() + "'"); + System.err.println("Warning: macro " + macroNames[j] + + " not found for word '" + morphItem.getWord() + "'"); } } String[] excludedNames = morphItem.getExcluded(); for (int j=0; j < excludedNames.length; j++) { if (!familyAndEntryNames.contains(excludedNames[j])) { - System.err.println("Warning: excluded family or entry '" + excludedNames[j] + - "' not found for word '" + morphItem.getWord() + "'"); + System.err.println("Warning: excluded family or entry '" + excludedNames[j] + + "' not found for word '" + morphItem.getWord() + "'"); } } } } + /** Loads the lexicon and morph files. */ + public void init(URL lexiconUrl, URL morphUrl) throws IOException { + + List lexicon = null; + List morph = null; + List macroModel = null; + + // load category families (lexicon), morph forms and macros + lexicon = getLexicon(lexiconUrl); + Pair,List> morphInfo = getMorph(morphUrl); + morph = morphInfo.a; macroModel = morphInfo.b; + + init(lexicon, morph, macroModel); + } /** Expands inheritsFrom links to feature equations for those features not explicitly listed. */ public void expandInheritsFrom(Category cat) { @@ -1309,4 +1322,8 @@ private void loadRelationSortOrder(Element relationSortingElt) { public GroupMap getWords() { return _words; } + + public GroupMap getMacros(){ + return _macros; + } } diff --git a/src/pom.xml b/src/pom.xml index 4d905e4..115ef64 100644 --- a/src/pom.xml +++ b/src/pom.xml @@ -1,11 +1,11 @@ - 4.0.0 - opennlp - openccg - 0.10.0 - + 4.0.0 + acceleratedtext + openccg + 0.10.4 + 1.8 1.8 @@ -13,13 +13,13 @@ - + jdom jdom 1.1 - + trove trove @@ -43,7 +43,7 @@ jopt-simple 3.1 - + junit junit @@ -67,8 +67,9 @@ - openccg - . + openccg + . + ${basedir}/../test maven-compiler-plugin @@ -127,30 +128,36 @@ - + + + org.apache.maven.plugins + maven-surefire-plugin + 3.0.0-M3 + + ${basedir}/../ + + + + org.apache.maven.surefire + surefire-junit47 + 3.0.0-M3 + + + - + + + + clojars + https://repo.clojars.org/ + + + + + + clojars + https://repo.clojars.org + + +