/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.tools;

import edu.ucla.sspace.common.ArgOptions;
import edu.ucla.sspace.dependency.CoNLLDependencyExtractor;
import edu.ucla.sspace.dependency.DependencyExtractor;
import edu.ucla.sspace.dependency.DependencyTreeNode;
import edu.ucla.sspace.dependency.WaCKyDependencyExtractor;
import edu.ucla.sspace.text.DependencyFileDocumentIterator;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.Stemmer;
import edu.ucla.sspace.text.TokenFilter;
import edu.ucla.sspace.util.LoggerUtil;
import edu.ucla.sspace.util.TrieMap;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

public class DepTokenCounter {
    private static final int UPDATE_INTERVAL = 10000;
    private static final Logger LOGGER = Logger.getLogger(DepTokenCounter.class.getName());
    private final Map<String, Integer> tokenToCount;
    private final boolean doLowerCasing;
    private final boolean doPos;
    private final DependencyExtractor extractor;

    public DepTokenCounter(boolean bl, boolean bl2, DependencyExtractor dependencyExtractor) {
        this.doLowerCasing = bl;
        this.doPos = bl2;
        this.extractor = dependencyExtractor;
        this.tokenToCount = new TrieMap<Integer>();
    }

    public Map<String, Integer> getTokenCounts() {
        return Collections.unmodifiableMap(this.tokenToCount);
    }

    private void process(Iterator<Document> iterator) throws IOException {
        long l = 0L;
        while (iterator.hasNext()) {
            DependencyTreeNode[] dependencyTreeNodeArray;
            Document document = iterator.next();
            for (DependencyTreeNode dependencyTreeNode : dependencyTreeNodeArray = this.extractor.readNextTree(document.reader())) {
                Integer n;
                String string = dependencyTreeNode.word();
                if (this.doLowerCasing) {
                    string = string.toLowerCase();
                }
                if (this.doPos) {
                    string = string + "-" + dependencyTreeNode.pos();
                }
                this.tokenToCount.put(string, (n = this.tokenToCount.get(string)) == null ? 1 : 1 + n);
                if (++l % 10000L != 0L) continue;
                LOGGER.fine("Processed " + l + " tokens.  Currently " + this.tokenToCount.size() + " unique tokens");
            }
        }
    }

    public static void main(String[] stringArray) throws Exception {
        ArgOptions argOptions = new ArgOptions();
        argOptions.addOption('Z', "stemmingAlgorithm", "specifices the stemming algorithm to use on tokens while iterating.  (default: none)", true, "CLASSNAME", "Tokenizing Options");
        argOptions.addOption('F', "tokenFilter", "filters to apply to the input token stream", true, "FILTER_SPEC", "Tokenizing Options");
        argOptions.addOption('L', "lowerCase", "lower-cases each token after all other filtering has been applied", false, null, "Tokenizing Options");
        argOptions.addOption('P', "partOfSpeech", "use part of speech tags for each token.", false, null, "Tokenizing Options");
        argOptions.addOption('H', "discardHeader", "If true, the first line of each dependency document will be discarded.", false, null, "Tokenizing Options");
        argOptions.addOption('v', "verbose", "Print verbose output about counting status", false, null, "Optional");
        argOptions.addOption('D', "dependencyParseFormat", "the name of the dependency parsed format for the corpus (defalt: CoNLL)", true, "STR", "Advanced Dependency Parsing");
        argOptions.parseOptions(stringArray);
        if (argOptions.numPositionalArgs() < 2) {
            System.out.println("usage: java DepTokenCounter [options] <output-file> <input-file> [<input-file>]*\n" + argOptions.prettyPrint() + "\n\n" + "token configuration lists sets of files that contain tokens to be included or\nexcluded.  The behavior, \"include\" or \"exclude\" is specified\nfirst, followed by one or more file names, each separated by colons.\nMultiple behaviors may be specified one after the other using a ','\ncharacter to separate them.  For example, a typicaly configuration may\nlook like: include=top-tokens.txt:test-words.txt,exclude=stop-words.txt\nNote behaviors are applied in the order they are presented on the command-line.");
            return;
        }
        if (argOptions.hasOption("verbose")) {
            LoggerUtil.setLevel(Level.FINE);
        }
        boolean bl = argOptions.hasOption("lowerCase");
        boolean bl2 = argOptions.hasOption("partOfSpeech");
        boolean bl3 = argOptions.hasOption('H');
        TokenFilter tokenFilter = argOptions.hasOption("tokenFilter") ? TokenFilter.loadFromSpecification(argOptions.getStringOption('F')) : null;
        Stemmer stemmer = argOptions.getObjectOption("stemmingAlgorithm", null);
        String string = argOptions.getStringOption("dependencyParseFormat", "CoNLL");
        CoNLLDependencyExtractor coNLLDependencyExtractor = null;
        if (string.equals("CoNLL")) {
            coNLLDependencyExtractor = new CoNLLDependencyExtractor(tokenFilter, stemmer);
        } else if (string.equals("WaCKy")) {
            coNLLDependencyExtractor = new WaCKyDependencyExtractor(tokenFilter, stemmer);
        }
        DepTokenCounter depTokenCounter = new DepTokenCounter(bl, bl2, coNLLDependencyExtractor);
        for (int i = 1; i < argOptions.numPositionalArgs(); ++i) {
            depTokenCounter.process(new DependencyFileDocumentIterator(argOptions.getPositionalArg(i), bl3));
        }
        PrintWriter printWriter = new PrintWriter(argOptions.getPositionalArg(0));
        for (Map.Entry<String, Integer> entry : depTokenCounter.tokenToCount.entrySet()) {
            printWriter.printf("%s %d\n", entry.getKey(), entry.getValue());
        }
        printWriter.close();
    }
}

