/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.common.ArgUtils;
import edu.stanford.nlp.parser.common.ParserGrammar;
import edu.stanford.nlp.parser.common.ParserQuery;
import edu.stanford.nlp.parser.common.ParserUtils;
import edu.stanford.nlp.parser.lexparser.BinaryGrammar;
import edu.stanford.nlp.parser.lexparser.BinaryGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.CollinsPuncTransformer;
import edu.stanford.nlp.parser.lexparser.DependencyGrammar;
import edu.stanford.nlp.parser.lexparser.ExactGrammarCompactor;
import edu.stanford.nlp.parser.lexparser.GrammarCompactor;
import edu.stanford.nlp.parser.lexparser.LexicalizedParserQuery;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.LinearGrammarSmoother;
import edu.stanford.nlp.parser.lexparser.MLEDependencyGrammar;
import edu.stanford.nlp.parser.lexparser.MLEDependencyGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.ParentAnnotationStats;
import edu.stanford.nlp.parser.lexparser.ParseFiles;
import edu.stanford.nlp.parser.lexparser.Reranker;
import edu.stanford.nlp.parser.lexparser.RerankingParserQuery;
import edu.stanford.nlp.parser.lexparser.SplittingGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.TreeAnnotator;
import edu.stanford.nlp.parser.lexparser.TreeAnnotatorAndBinarizer;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.UnaryGrammar;
import edu.stanford.nlp.parser.lexparser.UnknownWordModel;
import edu.stanford.nlp.parser.metrics.Eval;
import edu.stanford.nlp.parser.metrics.EvaluateTreebank;
import edu.stanford.nlp.parser.metrics.ParserQueryEval;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.io.TaggedFileRecord;
import edu.stanford.nlp.trees.CompositeTreeTransformer;
import edu.stanford.nlp.trees.CompositeTreebank;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.FilteringTreebank;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.LengthTreeFilter;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeLeafLabelTransformer;
import edu.stanford.nlp.trees.TreePrint;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.ErasureUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.ReflectionLoading;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.concurrent.MulticoreWrapper;
import edu.stanford.nlp.util.concurrent.ThreadsafeProcessor;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InvalidClassException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StreamCorruptedException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.function.Function;
import java.util.zip.GZIPOutputStream;

public class LexicalizedParser
extends ParserGrammar
implements Serializable {
    private static final Redwood.RedwoodChannels log = Redwood.channels(LexicalizedParser.class);
    public Lexicon lex;
    public BinaryGrammar bg;
    public UnaryGrammar ug;
    public DependencyGrammar dg;
    public Index<String> stateIndex;
    public Index<String> wordIndex;
    public Index<String> tagIndex;
    private Options op;
    public Reranker reranker;
    private static final String SERIALIZED_PARSER_PROPERTY = "edu.stanford.nlp.SerializedLexicalizedParser";
    public static final String DEFAULT_PARSER_LOC = System.getenv("NLP_PARSER") != null ? System.getenv("NLP_PARSER") : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    private static final long serialVersionUID = 2L;

    @Override
    public Options getOp() {
        return this.op;
    }

    @Override
    public TreebankLangParserParams getTLPParams() {
        return this.op.tlpParams;
    }

    @Override
    public TreebankLanguagePack treebankLanguagePack() {
        return this.getTLPParams().treebankLanguagePack();
    }

    @Override
    public String[] defaultCoreNLPFlags() {
        return this.getTLPParams().defaultCoreNLPFlags();
    }

    @Override
    public boolean requiresTags() {
        return false;
    }

    public static LexicalizedParser loadModel() {
        return LexicalizedParser.loadModel(new Options(), new String[0]);
    }

    public static LexicalizedParser loadModel(Options op, String ... extraFlags) {
        String source = System.getProperty(SERIALIZED_PARSER_PROPERTY);
        if (source == null) {
            source = DEFAULT_PARSER_LOC;
        }
        return LexicalizedParser.loadModel(source, op, extraFlags);
    }

    public static LexicalizedParser loadModel(String parserFileOrUrl, String ... extraFlags) {
        return LexicalizedParser.loadModel(parserFileOrUrl, new Options(), extraFlags);
    }

    public static LexicalizedParser loadModel(String parserFileOrUrl, List<String> extraFlags) {
        String[] flags = new String[extraFlags.size()];
        extraFlags.toArray(flags);
        return LexicalizedParser.loadModel(parserFileOrUrl, flags);
    }

    public static LexicalizedParser loadModel(String parserFileOrUrl, Options op, String ... extraFlags) {
        LexicalizedParser parser = LexicalizedParser.getParserFromFile(parserFileOrUrl, op);
        if (extraFlags.length > 0) {
            parser.setOptionFlags(extraFlags);
        }
        return parser;
    }

    public static LexicalizedParser loadModel(ObjectInputStream ois) {
        try {
            Object o = ois.readObject();
            if (o instanceof LexicalizedParser) {
                return (LexicalizedParser)o;
            }
            throw new ClassCastException("Wanted LexicalizedParser, got " + o.getClass());
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
        catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }

    public static LexicalizedParser copyLexicalizedParser(LexicalizedParser parser) {
        return new LexicalizedParser(parser.lex, parser.bg, parser.ug, parser.dg, parser.stateIndex, parser.wordIndex, parser.tagIndex, parser.op);
    }

    public LexicalizedParser(Lexicon lex, BinaryGrammar bg, UnaryGrammar ug, DependencyGrammar dg, Index<String> stateIndex, Index<String> wordIndex, Index<String> tagIndex, Options op) {
        this.lex = lex;
        this.bg = bg;
        this.ug = ug;
        this.dg = dg;
        this.stateIndex = stateIndex;
        this.wordIndex = wordIndex;
        this.tagIndex = tagIndex;
        this.op = op;
    }

    public static LexicalizedParser trainFromTreebank(Treebank trainTreebank, GrammarCompactor compactor, Options op) {
        return LexicalizedParser.getParserFromTreebank(trainTreebank, null, 1.0, compactor, op, null, null);
    }

    public static LexicalizedParser trainFromTreebank(String treebankPath, FileFilter filt, Options op) {
        return LexicalizedParser.trainFromTreebank(LexicalizedParser.makeTreebank(treebankPath, op, filt), op);
    }

    public static LexicalizedParser trainFromTreebank(Treebank trainTreebank, Options op) {
        return LexicalizedParser.trainFromTreebank(trainTreebank, null, op);
    }

    public Tree parseStrings(List<String> lst) {
        ArrayList<Word> words = new ArrayList<Word>();
        for (String word : lst) {
            words.add(new Word(word));
        }
        return this.parse(words);
    }

    @Override
    public Tree parse(List<? extends HasWord> lst) {
        try {
            ParserQuery pq = this.parserQuery();
            if (pq.parse(lst)) {
                Tree bestparse = pq.getBestParse();
                bestparse.setScore(pq.getPCFGScore() % -10000.0);
                return bestparse;
            }
        }
        catch (Exception e) {
            log.info("Following exception caught during parsing:");
            e.printStackTrace();
            log.info("Recovering using fall through strategy: will construct an (X ...) tree.");
        }
        return ParserUtils.xTree(lst);
    }

    public List<Tree> parseMultiple(List<? extends List<? extends HasWord>> sentences) {
        ArrayList<Tree> trees = new ArrayList<Tree>();
        for (List<? extends HasWord> list : sentences) {
            trees.add(this.parse(list));
        }
        return trees;
    }

    public List<Tree> parseMultiple(List<? extends List<? extends HasWord>> sentences, int nthreads) {
        MulticoreWrapper<List<? extends HasWord>, Tree> wrapper = new MulticoreWrapper<List<? extends HasWord>, Tree>(nthreads, new ThreadsafeProcessor<List<? extends HasWord>, Tree>(){

            @Override
            public Tree process(List<? extends HasWord> sentence) {
                return LexicalizedParser.this.parse(sentence);
            }

            @Override
            public ThreadsafeProcessor<List<? extends HasWord>, Tree> newInstance() {
                return this;
            }
        });
        ArrayList<Tree> trees = new ArrayList<Tree>();
        for (List<? extends HasWord> list : sentences) {
            wrapper.put(list);
            while (wrapper.peek()) {
                trees.add(wrapper.poll());
            }
        }
        wrapper.join();
        while (wrapper.peek()) {
            trees.add(wrapper.poll());
        }
        return trees;
    }

    public TreePrint getTreePrint() {
        return this.op.testOptions.treePrint(this.op.tlpParams);
    }

    @Override
    public Tree parseTree(List<? extends HasWord> sentence) {
        ParserQuery pq = this.parserQuery();
        if (pq.parse(sentence)) {
            return pq.getBestParse();
        }
        return null;
    }

    @Override
    public List<Eval> getExtraEvals() {
        if (this.reranker != null) {
            return this.reranker.getEvals();
        }
        return Collections.emptyList();
    }

    @Override
    public List<ParserQueryEval> getParserQueryEvals() {
        return Collections.emptyList();
    }

    @Override
    public ParserQuery parserQuery() {
        if (this.reranker == null) {
            return new LexicalizedParserQuery(this);
        }
        return new RerankingParserQuery(this.op, new LexicalizedParserQuery(this), this.reranker);
    }

    public LexicalizedParserQuery lexicalizedParserQuery() {
        return new LexicalizedParserQuery(this);
    }

    public static LexicalizedParser getParserFromFile(String parserFileOrUrl, Options op) {
        LexicalizedParser pd = LexicalizedParser.getParserFromSerializedFile(parserFileOrUrl);
        if (pd == null) {
            pd = LexicalizedParser.getParserFromTextFile(parserFileOrUrl, op);
        }
        return pd;
    }

    private static Treebank makeTreebank(String treebankPath, Options op, FileFilter filt) {
        log.info("Training a parser from treebank dir: " + treebankPath);
        Timing tim = new Timing();
        DiskTreebank trainTreebank = op.tlpParams.diskTreebank();
        log.info("Reading trees...");
        if (filt == null) {
            trainTreebank.loadPath(treebankPath);
        } else {
            trainTreebank.loadPath(treebankPath, filt);
        }
        Timing.tick("done [read " + trainTreebank.size() + " trees].");
        return trainTreebank;
    }

    private static DiskTreebank makeSecondaryTreebank(String treebankPath, Options op, FileFilter filt) {
        log.info("Additionally training using secondary disk treebank: " + treebankPath + ' ' + filt);
        Timing tim = new Timing();
        DiskTreebank trainTreebank = op.tlpParams.diskTreebank();
        log.info("Reading trees...");
        if (filt == null) {
            trainTreebank.loadPath(treebankPath);
        } else {
            trainTreebank.loadPath(treebankPath, filt);
        }
        Timing.tick("done [read " + trainTreebank.size() + " trees].");
        return trainTreebank;
    }

    public Lexicon getLexicon() {
        return this.lex;
    }

    public void saveParserToSerialized(String filename) {
        try {
            log.info("Writing parser in serialized format to file " + filename + ' ');
            ObjectOutputStream out2 = IOUtils.writeStreamFromString(filename);
            out2.writeObject(this);
            out2.close();
            log.info("done.");
        }
        catch (IOException ioe) {
            throw new RuntimeIOException(ioe);
        }
    }

    public void saveParserToTextFile(String filename) {
        if (this.reranker != null) {
            throw new UnsupportedOperationException("Sorry, but parsers with rerankers cannot be saved to text file");
        }
        try {
            log.info("Writing parser in text grammar format to file " + filename);
            BufferedOutputStream os = filename.endsWith(".gz") ? new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(filename))) : new BufferedOutputStream(new FileOutputStream(filename));
            PrintWriter out2 = new PrintWriter(os);
            String prefix = "BEGIN ";
            out2.println(prefix + "OPTIONS");
            this.op.writeData(out2);
            out2.println();
            log.info(".");
            out2.println(prefix + "STATE_INDEX");
            this.stateIndex.saveToWriter(out2);
            out2.println();
            log.info(".");
            out2.println(prefix + "WORD_INDEX");
            this.wordIndex.saveToWriter(out2);
            out2.println();
            log.info(".");
            out2.println(prefix + "TAG_INDEX");
            this.tagIndex.saveToWriter(out2);
            out2.println();
            log.info(".");
            String uwmClazz = this.lex.getUnknownWordModel() == null ? "null" : this.lex.getUnknownWordModel().getClass().getCanonicalName();
            out2.println(prefix + "LEXICON " + uwmClazz);
            this.lex.writeData(out2);
            out2.println();
            log.info(".");
            out2.println(prefix + "UNARY_GRAMMAR");
            this.ug.writeData(out2);
            out2.println();
            log.info(".");
            out2.println(prefix + "BINARY_GRAMMAR");
            this.bg.writeData(out2);
            out2.println();
            log.info(".");
            out2.println(prefix + "DEPENDENCY_GRAMMAR");
            if (this.dg != null) {
                this.dg.writeData(out2);
            }
            out2.println();
            log.info(".");
            out2.flush();
            out2.close();
            log.info("done.");
        }
        catch (IOException e) {
            log.info("Trouble saving parser data to ASCII format.");
            throw new RuntimeIOException(e);
        }
    }

    private static void confirmBeginBlock(String file, String line) {
        if (line == null) {
            throw new RuntimeException(file + ": expecting BEGIN block; got end of file.");
        }
        if (!line.startsWith("BEGIN")) {
            throw new RuntimeException(file + ": expecting BEGIN block; got " + line);
        }
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    protected static LexicalizedParser getParserFromTextFile(String textFileOrUrl, Options op) {
        try (BufferedReader in = IOUtils.readerFromString(textFileOrUrl);){
            Timing tim = new Timing();
            String line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            op.readData(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            Index<String> stateIndex = HashIndex.loadFromReader(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            Index<String> wordIndex = HashIndex.loadFromReader(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            Index<String> tagIndex = HashIndex.loadFromReader(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            Lexicon lex = op.tlpParams.lex(op, wordIndex, tagIndex);
            String uwmClazz = line.split(" +")[2];
            if (!uwmClazz.equals("null")) {
                UnknownWordModel model = (UnknownWordModel)ReflectionLoading.loadByReflection(uwmClazz, op, lex, wordIndex, tagIndex);
                lex.setUnknownWordModel(model);
            }
            lex.readData(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            UnaryGrammar ug = new UnaryGrammar(stateIndex);
            ug.readData(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            BinaryGrammar bg = new BinaryGrammar(stateIndex);
            bg.readData(in);
            line = in.readLine();
            LexicalizedParser.confirmBeginBlock(textFileOrUrl, line);
            MLEDependencyGrammar dg = new MLEDependencyGrammar(op.tlpParams, op.directional, op.distance, op.coarseDistance, op.trainOptions.basicCategoryTagsInDependencyGrammar, op, wordIndex, tagIndex);
            dg.readData(in);
            log.info("Loading parser from text file " + textFileOrUrl + " ... done [" + tim.toSecondsString() + " sec].");
            LexicalizedParser lexicalizedParser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op);
            return lexicalizedParser;
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static LexicalizedParser getParserFromSerializedFile(String serializedFileOrUrl) {
        try {
            Timing tim = new Timing();
            ObjectInputStream in = IOUtils.readStreamFromString(serializedFileOrUrl);
            LexicalizedParser pd = LexicalizedParser.loadModel(in);
            in.close();
            log.info("Loading parser from serialized file " + serializedFileOrUrl + " ... done [" + tim.toSecondsString() + " sec].");
            return pd;
        }
        catch (InvalidClassException ice) {
            throw new RuntimeException("Invalid class in file: " + serializedFileOrUrl, ice);
        }
        catch (FileNotFoundException fnfe) {
            throw new RuntimeException("File not found: " + serializedFileOrUrl, fnfe);
        }
        catch (StreamCorruptedException sce) {
            log.info("Attempting to load " + serializedFileOrUrl + " as a serialized grammar caused error below, but this may just be because it's a text grammar!");
            log.info(sce);
        }
        catch (Exception e) {
            log.error(e);
        }
        return null;
    }

    private static void printOptions(boolean train, Options op) {
        op.display();
        if (train) {
            op.trainOptions.display();
        } else {
            op.testOptions.display();
        }
        op.tlpParams.display();
    }

    public static TreeAnnotatorAndBinarizer buildTrainBinarizer(Options op) {
        TreebankLangParserParams tlpParams = op.tlpParams;
        if (!op.trainOptions.leftToRight) {
            return new TreeAnnotatorAndBinarizer(tlpParams, op.forceCNF, !op.trainOptions.outsideFactor(), !op.trainOptions.predictSplits, op);
        }
        return new TreeAnnotatorAndBinarizer(tlpParams.headFinder(), new LeftHeadFinder(), tlpParams, op.forceCNF, !op.trainOptions.outsideFactor(), !op.trainOptions.predictSplits, op);
    }

    public static CompositeTreeTransformer buildTrainTransformer(Options op) {
        TreeAnnotatorAndBinarizer binarizer = LexicalizedParser.buildTrainBinarizer(op);
        return LexicalizedParser.buildTrainTransformer(op, binarizer);
    }

    public static CompositeTreeTransformer buildTrainTransformer(Options op, TreeAnnotatorAndBinarizer binarizer) {
        TreebankLangParserParams tlpParams = op.tlpParams;
        TreebankLanguagePack tlp = tlpParams.treebankLanguagePack();
        CompositeTreeTransformer trainTransformer = new CompositeTreeTransformer();
        if (op.trainOptions.preTransformer != null) {
            trainTransformer.addTransformer(op.trainOptions.preTransformer);
        }
        if (op.trainOptions.collinsPunc) {
            CollinsPuncTransformer collinsPuncTransformer = new CollinsPuncTransformer(tlp);
            trainTransformer.addTransformer(collinsPuncTransformer);
        }
        trainTransformer.addTransformer(binarizer);
        if (op.wordFunction != null) {
            TreeLeafLabelTransformer wordFunctionTransformer = new TreeLeafLabelTransformer(op.wordFunction);
            trainTransformer.addTransformer(wordFunctionTransformer);
        }
        return trainTransformer;
    }

    public static Triple<Treebank, Treebank, Treebank> getAnnotatedBinaryTreebankFromTreebank(Treebank trainTreebank, Treebank secondaryTreebank, Treebank tuneTreebank, Options op) {
        TreebankLangParserParams tlpParams = op.tlpParams;
        TreebankLanguagePack tlp = tlpParams.treebankLanguagePack();
        if (op.testOptions.verbose) {
            PrintWriter pwErr = tlpParams.pw(System.err);
            pwErr.print("Training ");
            pwErr.println(trainTreebank.textualSummary(tlp));
            if (secondaryTreebank != null) {
                pwErr.print("Secondary training ");
                pwErr.println(secondaryTreebank.textualSummary(tlp));
            }
        }
        log.info("Binarizing trees...");
        TreeAnnotatorAndBinarizer binarizer = LexicalizedParser.buildTrainBinarizer(op);
        CompositeTreeTransformer trainTransformer = LexicalizedParser.buildTrainTransformer(op, binarizer);
        Treebank wholeTreebank = secondaryTreebank == null ? trainTreebank : new CompositeTreebank(trainTreebank, secondaryTreebank);
        if (op.trainOptions.selectiveSplit) {
            op.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(wholeTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, tlp);
            LexicalizedParser.removeDeleteSplittersFromSplitters(tlp, op);
            if (op.testOptions.verbose) {
                ArrayList<String> list = new ArrayList<String>(op.trainOptions.splitters);
                Collections.sort(list);
                log.info("Parent split categories: " + list);
            }
        }
        if (op.trainOptions.selectivePostSplit) {
            TreeAnnotator myTransformer = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);
            wholeTreebank = wholeTreebank.transform(myTransformer);
            op.trainOptions.postSplitters = ParentAnnotationStats.getSplitCategories(wholeTreebank, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, tlp);
            if (op.testOptions.verbose) {
                log.info("Parent post annotation split categories: " + op.trainOptions.postSplitters);
            }
        }
        if (op.trainOptions.hSelSplit) {
            int ptt = op.trainOptions.printTreeTransformations;
            op.trainOptions.printTreeTransformations = 0;
            binarizer.setDoSelectiveSplit(false);
            for (Tree tree : wholeTreebank) {
                trainTransformer.transformTree(tree);
            }
            binarizer.setDoSelectiveSplit(true);
            op.trainOptions.printTreeTransformations = ptt;
        }
        trainTreebank = trainTreebank.transform(trainTransformer);
        if (secondaryTreebank != null) {
            secondaryTreebank = secondaryTreebank.transform(trainTransformer);
        }
        if (op.trainOptions.printAnnotatedStateCounts) {
            binarizer.printStateCounts();
        }
        if (op.trainOptions.printAnnotatedRuleCounts) {
            binarizer.printRuleCounts();
        }
        if (tuneTreebank != null) {
            tuneTreebank = tuneTreebank.transform(trainTransformer);
        }
        Timing.tick("done.");
        if (op.testOptions.verbose) {
            binarizer.dumpStats();
        }
        return new Triple<Treebank, Treebank, Treebank>(trainTreebank, secondaryTreebank, tuneTreebank);
    }

    private static void removeDeleteSplittersFromSplitters(TreebankLanguagePack tlp, Options op) {
        if (op.trainOptions.deleteSplitters != null) {
            ArrayList<String> deleted = new ArrayList<String>();
            for (String del : op.trainOptions.deleteSplitters) {
                String baseDel = tlp.basicCategory(del);
                boolean checkBasic = del.equals(baseDel);
                Iterator<String> it = op.trainOptions.splitters.iterator();
                while (it.hasNext()) {
                    String elem = it.next();
                    String baseElem = tlp.basicCategory(elem);
                    boolean delStr = checkBasic && baseElem.equals(baseDel) || elem.equals(del);
                    if (!delStr) continue;
                    it.remove();
                    deleted.add(elem);
                }
            }
            if (op.testOptions.verbose) {
                log.info("Removed from vertical splitters: " + deleted);
            }
        }
    }

    public static LexicalizedParser getParserFromTreebank(Treebank trainTreebank, Treebank secondaryTrainTreebank, double weight, GrammarCompactor compactor, Options op, Treebank tuneTreebank, List<List<TaggedWord>> extraTaggedWords) {
        HashIndex<String> tagIndex;
        HashIndex<String> wordIndex;
        Index<String> stateIndex;
        Lexicon lex;
        Pair bgug;
        LexicalizedParser.printOptions(true, op);
        Timing tim = new Timing();
        Triple<Treebank, Treebank, Treebank> treebanks = TreeAnnotatorAndBinarizer.getAnnotatedBinaryTreebankFromTreebank(trainTreebank, secondaryTrainTreebank, tuneTreebank, op);
        Timing.tick("done.");
        Treebank trainTreebankRaw = trainTreebank;
        trainTreebank = treebanks.first();
        secondaryTrainTreebank = treebanks.second();
        tuneTreebank = treebanks.third();
        trainTreebank = new FilteringTreebank(trainTreebank, new LengthTreeFilter(op.trainOptions.trainLengthLimit + 1));
        if (secondaryTrainTreebank != null) {
            secondaryTrainTreebank = new FilteringTreebank(secondaryTrainTreebank, new LengthTreeFilter(op.trainOptions.trainLengthLimit + 1));
        }
        if (tuneTreebank != null) {
            tuneTreebank = new FilteringTreebank(tuneTreebank, new LengthTreeFilter(op.trainOptions.trainLengthLimit + 1));
        }
        if (op.trainOptions.predictSplits) {
            SplittingGrammarExtractor extractor = new SplittingGrammarExtractor(op);
            log.info("Extracting PCFG...");
            if (secondaryTrainTreebank == null) {
                extractor.extract(trainTreebank);
            } else {
                extractor.extract(trainTreebank, 1.0, secondaryTrainTreebank, weight);
            }
            bgug = extractor.bgug;
            lex = extractor.lex;
            stateIndex = extractor.stateIndex;
            wordIndex = extractor.wordIndex;
            tagIndex = extractor.tagIndex;
            Timing.tick("done.");
        } else {
            stateIndex = new HashIndex<String>();
            wordIndex = new HashIndex();
            tagIndex = new HashIndex();
            BinaryGrammarExtractor bgExtractor = new BinaryGrammarExtractor(op, stateIndex);
            log.info("Extracting PCFG...");
            bgug = secondaryTrainTreebank == null ? (Pair)bgExtractor.extract(trainTreebank) : (Pair)bgExtractor.extract(trainTreebank, 1.0, secondaryTrainTreebank, weight);
            Timing.tick("done.");
            log.info("Extracting Lexicon...");
            lex = op.tlpParams.lex(op, wordIndex, tagIndex);
            double trainSize = trainTreebank.size();
            if (secondaryTrainTreebank != null) {
                trainSize += (double)secondaryTrainTreebank.size() * weight;
            }
            if (extraTaggedWords != null) {
                trainSize += (double)extraTaggedWords.size();
            }
            lex.initializeTraining(trainSize);
            lex.train((Collection<Tree>)trainTreebank, trainTreebankRaw);
            if (secondaryTrainTreebank != null) {
                lex.train((Collection<Tree>)secondaryTrainTreebank, weight);
            }
            if (extraTaggedWords != null) {
                for (List<TaggedWord> sentence : extraTaggedWords) {
                    lex.trainUnannotated(sentence, 1.0);
                }
            }
            lex.finishTraining();
            Timing.tick("done.");
        }
        if (op.trainOptions.ruleSmoothing) {
            log.info("Smoothing PCFG...");
            LinearGrammarSmoother smoother = new LinearGrammarSmoother(op.trainOptions, stateIndex, tagIndex);
            bgug = (Pair)smoother.apply(bgug);
            Timing.tick("done.");
        }
        if (compactor != null) {
            log.info("Compacting grammar...");
            Triple<Index<String>, UnaryGrammar, BinaryGrammar> compacted = compactor.compactGrammar(bgug, stateIndex);
            stateIndex = compacted.first();
            bgug.setFirst(compacted.second());
            bgug.setSecond(compacted.third());
            Timing.tick("done.");
        }
        log.info("Compiling grammar...");
        BinaryGrammar bg = (BinaryGrammar)bgug.second;
        bg.splitRules();
        UnaryGrammar ug = (UnaryGrammar)bgug.first;
        ug.purgeRules();
        Timing.tick("done");
        DependencyGrammar dg = null;
        if (op.doDep) {
            log.info("Extracting Dependencies...");
            MLEDependencyGrammarExtractor dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex);
            dg = secondaryTrainTreebank == null ? (DependencyGrammar)dgExtractor.extract(trainTreebank) : (DependencyGrammar)dgExtractor.extract(trainTreebank, 1.0, secondaryTrainTreebank, weight);
            Timing.tick("done.");
            if (tuneTreebank != null) {
                log.info("Tuning Dependency Model...");
                dg.setLexicon(lex);
                dg.tune(tuneTreebank);
                Timing.tick("done.");
            }
        }
        log.info("Done training parser.");
        if (op.trainOptions.trainTreeFile != null) {
            try {
                log.info("Writing out binary trees to " + op.trainOptions.trainTreeFile + "...");
                IOUtils.writeObjectToFile((Object)trainTreebank, op.trainOptions.trainTreeFile);
                IOUtils.writeObjectToFile((Object)secondaryTrainTreebank, op.trainOptions.trainTreeFile);
                Timing.tick("done.");
            }
            catch (Exception e) {
                log.info("Problem writing out binary trees.");
            }
        }
        return new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op);
    }

    @Override
    public void setOptionFlags(String ... flags) {
        this.op.setOptions(flags);
    }

    public static void main(String[] args) {
        LexicalizedParser lp;
        boolean train = false;
        boolean saveToSerializedFile = false;
        boolean saveToTextFile = false;
        String serializedInputFileOrUrl = null;
        String textInputFileOrUrl = null;
        String serializedOutputFileOrUrl = null;
        String textOutputFileOrUrl = null;
        String treebankPath = null;
        MemoryTreebank testTreebank = null;
        MemoryTreebank tuneTreebank = null;
        String testPath = null;
        FileFilter testFilter = null;
        String tunePath = null;
        FileFilter tuneFilter = null;
        FileFilter trainFilter = null;
        String secondaryTreebankPath = null;
        double secondaryTreebankWeight = 1.0;
        FileFilter secondaryTrainFilter = null;
        TokenizerFactory tokenizerFactory = null;
        String tokenizerOptions = null;
        String tokenizerFactoryClass = null;
        String tokenizerMethod = null;
        boolean tokenized = false;
        Function escaper = null;
        String tagDelimiter = null;
        String sentenceDelimiter = null;
        String elementDelimiter = null;
        int argIndex = 0;
        if (args.length < 1) {
            log.info("Basic usage (see Javadoc for more): java edu.stanford.nlp.parser.lexparser.LexicalizedParser parserFileOrUrl filename*");
            return;
        }
        Options op = new Options();
        ArrayList<String> optionArgs = new ArrayList<String>();
        String encoding = null;
        while (argIndex < args.length && args[argIndex].charAt(0) == '-' && !args[argIndex].equals("-")) {
            if (args[argIndex].equalsIgnoreCase("-train") || args[argIndex].equalsIgnoreCase("-trainTreebank")) {
                train = true;
                Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-train");
                argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
                treebankPath = treebankDescription.first();
                trainFilter = treebankDescription.second();
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-train2")) {
                Triple<String, FileFilter, Double> treebankDescription = ArgUtils.getWeightedTreebankDescription(args, argIndex, "-train2");
                argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
                secondaryTreebankPath = treebankDescription.first();
                secondaryTrainFilter = treebankDescription.second();
                secondaryTreebankWeight = treebankDescription.third();
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tLPP") && argIndex + 1 < args.length) {
                try {
                    op.tlpParams = (TreebankLangParserParams)Class.forName(args[argIndex + 1]).getDeclaredConstructor(new Class[0]).newInstance(new Object[0]);
                }
                catch (ClassNotFoundException e) {
                    log.info("Class not found: " + args[argIndex + 1]);
                    throw new RuntimeException(e);
                }
                catch (NoSuchMethodException e) {
                    log.info("Method not found: " + args[argIndex + 1]);
                    throw new RuntimeException(e);
                }
                catch (InstantiationException | InvocationTargetException e) {
                    log.info("Couldn't instantiate: " + args[argIndex + 1] + ": " + e.toString());
                    throw new RuntimeException(e);
                }
                catch (IllegalAccessException e) {
                    log.info("Illegal access" + e);
                    throw new RuntimeException(e);
                }
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-encoding")) {
                encoding = args[argIndex + 1];
                op.tlpParams.setInputEncoding(encoding);
                op.tlpParams.setOutputEncoding(encoding);
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tokenized")) {
                tokenized = true;
                ++argIndex;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-escaper")) {
                try {
                    escaper = (Function)ReflectionLoading.loadByReflection(args[argIndex + 1], new Object[0]);
                }
                catch (Exception e) {
                    log.info("Couldn't instantiate escaper " + args[argIndex + 1] + ": " + e);
                }
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tokenizerOptions")) {
                tokenizerOptions = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tokenizerFactory")) {
                tokenizerFactoryClass = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tokenizerMethod")) {
                tokenizerMethod = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-sentences")) {
                sentenceDelimiter = args[argIndex + 1];
                if (sentenceDelimiter.equalsIgnoreCase("newline")) {
                    sentenceDelimiter = "\n";
                }
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-parseInside")) {
                elementDelimiter = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tagSeparator")) {
                tagDelimiter = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-loadFromSerializedFile") || args[argIndex].equalsIgnoreCase("-model")) {
                serializedInputFileOrUrl = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-loadFromTextFile")) {
                textInputFileOrUrl = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-saveToSerializedFile")) {
                saveToSerializedFile = true;
                if (ArgUtils.numSubArgs(args, argIndex) < 1) {
                    log.info("Missing path: -saveToSerialized filename");
                } else {
                    serializedOutputFileOrUrl = args[argIndex + 1];
                }
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-saveToTextFile")) {
                saveToTextFile = true;
                textOutputFileOrUrl = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-saveTrainTrees")) {
                op.trainOptions.trainTreeFile = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-treebank") || args[argIndex].equalsIgnoreCase("-testTreebank") || args[argIndex].equalsIgnoreCase("-test")) {
                Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-test");
                argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
                testPath = treebankDescription.first();
                testFilter = treebankDescription.second();
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tune")) {
                Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-tune");
                argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
                tunePath = treebankDescription.first();
                tuneFilter = treebankDescription.second();
                continue;
            }
            int oldIndex = argIndex;
            argIndex = op.setOptionOrWarn(args, argIndex);
            optionArgs.addAll(Arrays.asList(args).subList(oldIndex, argIndex));
        }
        if (tuneFilter != null || tunePath != null) {
            if (tunePath == null) {
                if (treebankPath == null) {
                    throw new RuntimeException("No tune treebank path specified...");
                }
                log.info("No tune treebank path specified.  Using train path: \"" + treebankPath + '\"');
                tunePath = treebankPath;
            }
            tuneTreebank = op.tlpParams.testMemoryTreebank();
            tuneTreebank.loadPath(tunePath, tuneFilter);
        }
        if (!train && op.testOptions.verbose) {
            StringUtils.logInvocationString(log, args);
        }
        if (train) {
            StringUtils.logInvocationString(log, args);
            ExactGrammarCompactor compactor = null;
            if (op.trainOptions.compactGrammar() == 3) {
                compactor = new ExactGrammarCompactor(op, false, false);
            }
            Treebank trainTreebank = LexicalizedParser.makeTreebank(treebankPath, op, trainFilter);
            DiskTreebank secondaryTrainTreebank = null;
            if (secondaryTreebankPath != null) {
                secondaryTrainTreebank = LexicalizedParser.makeSecondaryTreebank(secondaryTreebankPath, op, secondaryTrainFilter);
            }
            ArrayList<List<TaggedWord>> extraTaggedWords = null;
            if (op.trainOptions.taggedFiles != null) {
                extraTaggedWords = new ArrayList<List<TaggedWord>>();
                List<TaggedFileRecord> fileRecords = TaggedFileRecord.createRecords(new Properties(), op.trainOptions.taggedFiles);
                for (TaggedFileRecord record : fileRecords) {
                    for (List sentence : record.reader()) {
                        extraTaggedWords.add(sentence);
                    }
                }
            }
            lp = LexicalizedParser.getParserFromTreebank(trainTreebank, secondaryTrainTreebank, secondaryTreebankWeight, compactor, op, tuneTreebank, extraTaggedWords);
        } else if (textInputFileOrUrl != null) {
            lp = LexicalizedParser.getParserFromTextFile(textInputFileOrUrl, op);
        } else {
            if (serializedInputFileOrUrl == null && argIndex < args.length) {
                serializedInputFileOrUrl = args[argIndex];
                ++argIndex;
            }
            if (serializedInputFileOrUrl == null) {
                log.info("No grammar specified, exiting...");
                return;
            }
            String[] extraArgs = new String[optionArgs.size()];
            extraArgs = optionArgs.toArray(extraArgs);
            try {
                lp = LexicalizedParser.loadModel(serializedInputFileOrUrl, op, extraArgs);
                op = lp.op;
            }
            catch (IllegalArgumentException e) {
                log.info("Error loading parser, exiting...");
                throw e;
            }
        }
        if (tokenizerFactoryClass != null || tokenizerOptions != null) {
            try {
                if (tokenizerFactoryClass != null) {
                    Method factoryMethod;
                    Class clazz = (Class)ErasureUtils.uncheckedCast(Class.forName(tokenizerFactoryClass));
                    if (tokenizerOptions != null) {
                        factoryMethod = clazz.getMethod(tokenizerMethod != null ? tokenizerMethod : "newWordTokenizerFactory", String.class);
                        tokenizerFactory = (TokenizerFactory)ErasureUtils.uncheckedCast(factoryMethod.invoke(null, tokenizerOptions));
                    } else {
                        factoryMethod = clazz.getMethod(tokenizerMethod != null ? tokenizerMethod : "newTokenizerFactory", new Class[0]);
                        tokenizerFactory = (TokenizerFactory)ErasureUtils.uncheckedCast(factoryMethod.invoke(null, new Object[0]));
                    }
                } else {
                    tokenizerFactory = lp.op.langpack().getTokenizerFactory();
                    tokenizerFactory.setOptions(tokenizerOptions);
                }
            }
            catch (ClassNotFoundException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
                log.info("Couldn't instantiate TokenizerFactory " + tokenizerFactoryClass + " with options " + tokenizerOptions);
                throw new RuntimeException(e);
            }
        }
        if (encoding != null) {
            op.tlpParams.setInputEncoding(encoding);
            op.tlpParams.setOutputEncoding(encoding);
        }
        if (testFilter != null || testPath != null) {
            if (testPath == null) {
                if (treebankPath == null) {
                    throw new RuntimeException("No test treebank path specified...");
                }
                log.info("No test treebank path specified.  Using train path: \"" + treebankPath + '\"');
                testPath = treebankPath;
            }
            testTreebank = op.tlpParams.testMemoryTreebank();
            testTreebank.loadPath(testPath, testFilter);
        }
        op.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(op.tlpParams.sisterSplitters()));
        if (saveToTextFile) {
            if (textOutputFileOrUrl != null) {
                lp.saveParserToTextFile(textOutputFileOrUrl);
            } else {
                log.info("Usage: must specify a text grammar output path");
            }
        }
        if (saveToSerializedFile) {
            if (serializedOutputFileOrUrl != null) {
                lp.saveParserToSerialized(serializedOutputFileOrUrl);
            } else if (textOutputFileOrUrl == null && testTreebank == null) {
                log.info("usage: java edu.stanford.nlp.parser.lexparser.LexicalizedParser -train trainFilesPath [fileRange] -saveToSerializedFile serializedParserFilename");
            }
        }
        if (op.testOptions.verbose || train) {
            String lexNumRules = lp.lex != null ? Integer.toString(lp.lex.numRules()) : "";
            log.info("Grammar\tStates\tTags\tWords\tUnaryR\tBinaryR\tTaggings");
            log.info("Grammar\t" + lp.stateIndex.size() + '\t' + lp.tagIndex.size() + '\t' + lp.wordIndex.size() + '\t' + (lp.ug != null ? Integer.valueOf(lp.ug.numRules()) : "") + '\t' + (lp.bg != null ? Integer.valueOf(lp.bg.numRules()) : "") + '\t' + lexNumRules);
            log.info("ParserPack is " + op.tlpParams.getClass().getName());
            log.info("Lexicon is " + lp.lex.getClass().getName());
            if (op.testOptions.verbose) {
                log.info("Tags are: " + lp.tagIndex);
            }
            LexicalizedParser.printOptions(false, op);
        }
        if (testTreebank != null) {
            EvaluateTreebank evaluator = new EvaluateTreebank(lp);
            evaluator.testOnTreebank(testTreebank);
        } else if (argIndex >= args.length) {
            PrintWriter pwOut = op.tlpParams.pw();
            PrintWriter pwErr = op.tlpParams.pw(System.err);
            ParserQuery pq = lp.parserQuery();
            if (pq.parse(op.tlpParams.defaultTestSentence())) {
                lp.getTreePrint().printTree(pq.getBestParse(), pwOut);
            } else {
                pwErr.println("Error. Can't parse test sentence: " + op.tlpParams.defaultTestSentence());
            }
        } else {
            ParseFiles.parseFiles(args, argIndex, tokenized, tokenizerFactory, elementDelimiter, sentenceDelimiter, escaper, tagDelimiter, op, lp.getTreePrint(), lp);
        }
    }
}

