/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.mains;

import edu.ucla.sspace.common.ArgOptions;
import edu.ucla.sspace.dependency.CoNLLDependencyExtractor;
import edu.ucla.sspace.dependency.DependencyExtractorManager;
import edu.ucla.sspace.dependency.WaCKyDependencyExtractor;
import edu.ucla.sspace.mains.GenericMain;
import edu.ucla.sspace.text.DependencyFileDocumentIterator;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.Stemmer;
import edu.ucla.sspace.text.TokenFilter;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;

public abstract class DependencyGenericMain
extends GenericMain {
    static final String DEPENDENCY_EXTRACTOR_DESCRIPTION = "This semantic space algorithm operates only on dependency parsed corpora.  The\ncorpora must be formated in way recognized by one of extractors.  The currently\nsupported dependency extractors are CoNLL and WaCKy.  One of these may be\nspecifed with the -D, --dependencyParseFormat option.  The CoNLL extractor\nsupports optional configuration with the -G, --configFile option to indicate the\norder of the fields.";

    @Override
    public void addExtraOptions(ArgOptions argOptions) {
        argOptions.addOption('G', "configFile", "XML configuration file for the format of a dependency parse", true, "FILE", "Advanced Dependency Parsing");
        argOptions.addOption('D', "dependencyParseFormat", "the name of the dependency parsed format for the corpus (defalt: CoNLL)", true, "STR", "Advanced Dependency Parsing");
        argOptions.addOption('H', "discardHeaderLines", "If true, the first line in every dependency parse document will be discarded.  This is useful if the first line corresponds to a document or instance identifier and not acually part of the parsed text.  (Default: false)", false, null, "Advanced Dependency Parsing");
    }

    protected void setupDependencyExtractor() {
        TokenFilter tokenFilter = this.argOptions.hasOption("tokenFilter") ? TokenFilter.loadFromSpecification(this.argOptions.getStringOption('F')) : null;
        Stemmer stemmer = this.argOptions.getObjectOption("stemmingAlgorithm", null);
        String string = this.argOptions.getStringOption("dependencyParseFormat", "CoNLL");
        if (string.equals("CoNLL")) {
            CoNLLDependencyExtractor coNLLDependencyExtractor = this.argOptions.hasOption('G') ? new CoNLLDependencyExtractor(this.argOptions.getStringOption('G'), tokenFilter, stemmer) : new CoNLLDependencyExtractor(tokenFilter, stemmer);
            DependencyExtractorManager.addExtractor("CoNLL", coNLLDependencyExtractor, true);
        } else if (string.equals("WaCKy")) {
            if (this.argOptions.hasOption('G')) {
                throw new IllegalArgumentException("WaCKy does not support configuration with -G");
            }
            WaCKyDependencyExtractor waCKyDependencyExtractor = new WaCKyDependencyExtractor(tokenFilter, stemmer);
            DependencyExtractorManager.addExtractor("WaCKy", waCKyDependencyExtractor, true);
        } else {
            throw new IllegalArgumentException("Unrecognized dependency parsed format: " + string);
        }
    }

    @Override
    protected void addFileIterators(Collection<Iterator<Document>> collection, String[] stringArray) throws IOException {
        throw new UnsupportedOperationException("A file based document iterator does not exist");
    }

    @Override
    protected void addDocIterators(Collection<Iterator<Document>> collection, String[] stringArray) throws IOException {
        boolean bl = this.argOptions.hasOption('H');
        for (String string : stringArray) {
            collection.add(new DependencyFileDocumentIterator(string, bl));
        }
    }

    @Override
    protected void usage() {
        String string = this.getAlgorithmSpecifics();
        System.out.println("usage: java " + this.getClass().getName() + " [options] <output-dir>\n" + this.argOptions.prettyPrint() + (string.length() == 0 ? "" : "\n" + string) + "\n\n" + "token configuration lists sets of files that contain tokens to be included or\nexcluded.  The behavior, \"include\" or \"exclude\" is specified\nfirst, followed by one or more file names, each separated by colons.\nMultiple behaviors may be specified one after the other using a ','\ncharacter to separate them.  For example, a typicaly configuration may\nlook like: include=top-tokens.txt:test-words.txt,exclude=stop-words.txt\nNote behaviors are applied in the order they are presented on the command-line." + "\n\n" + "Tokens can be stemmed for various languages using wrappers for the snoball\nstemming algorithms.  Each language has it's own stemmer, following a simple naming\n convention: LanguagenameStemmer." + "\n\n" + "Semantic space files stored in one of four formats: text, sparse_text, binary\nsparse_binary.  The sparse versions should be used if the algorithm produces\nsemantic vectors in which more than half of the values are 0.  The sparse\nversions are much more compact for these types of semantic spaces and will be\nboth faster to read and write as well as be much smaller on disk.  Text formats\nare human readable but may take up more space.  Binary formats offer\nsignificantly better I/O performance." + "\n\n" + DEPENDENCY_EXTRACTOR_DESCRIPTION + "\n\n" + "Send bug reports or comments to <s-space-research-dev@googlegroups.com>.");
    }
}

