/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.mains;

import edu.ucla.sspace.common.ArgOptions;
import edu.ucla.sspace.common.SemanticSpace;
import edu.ucla.sspace.common.SemanticSpaceIO;
import edu.ucla.sspace.text.CorpusReader;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.FileListDocumentIterator;
import edu.ucla.sspace.text.IteratorFactory;
import edu.ucla.sspace.text.OneLinePerDocumentIterator;
import edu.ucla.sspace.util.CombinedIterator;
import edu.ucla.sspace.util.LimitedIterator;
import edu.ucla.sspace.util.LoggerUtil;
import edu.ucla.sspace.util.ReflectionUtil;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;

public abstract class GenericMain {
    public static final String EXT = ".sspace";
    private static final Logger LOGGER = Logger.getLogger(GenericMain.class.getName());
    protected boolean verbose;
    protected final ArgOptions argOptions;
    protected final boolean isMultiThreaded;

    public GenericMain() {
        this(true);
    }

    public GenericMain(boolean bl) {
        this.isMultiThreaded = bl;
        this.argOptions = this.setupOptions();
        this.verbose = false;
    }

    protected abstract SemanticSpace getSpace();

    protected String getAlgorithmSpecifics() {
        return "";
    }

    protected void usage() {
        String string = this.getAlgorithmSpecifics();
        System.out.println("usage: java " + this.getClass().getName() + " [options] <output-dir>\n" + this.argOptions.prettyPrint() + (string.length() == 0 ? "" : "\n" + string) + "\n" + "The compound word option specifies a file whose contents are compount tokens,\ne.g. white house.  Each compound token should be specified on its own line.\nCompount tokenization is greedy and will select the longest compound token\npresent.  For example if \"bar exam\" and \"California bar exam\" are both\ncompound tokens, the latter will always be returned as a single token, rather\nthan returning the two tokens \"California\" and \"bar exam\"." + "\n\n" + "token configuration lists sets of files that contain tokens to be included or\nexcluded.  The behavior, \"include\" or \"exclude\" is specified\nfirst, followed by one or more file names, each separated by colons.\nMultiple behaviors may be specified one after the other using a ','\ncharacter to separate them.  For example, a typicaly configuration may\nlook like: include=top-tokens.txt:test-words.txt,exclude=stop-words.txt\nNote behaviors are applied in the order they are presented on the command-line." + "\n\n" + "Tokens can be stemmed for various languages using wrappers for the snoball\nstemming algorithms.  Each language has it's own stemmer, following a simple naming\n convention: LanguagenameStemmer." + "\n\n" + "Semantic space files stored in one of four formats: text, sparse_text, binary\nsparse_binary.  The sparse versions should be used if the algorithm produces\nsemantic vectors in which more than half of the values are 0.  The sparse\nversions are much more compact for these types of semantic spaces and will be\nboth faster to read and write as well as be much smaller on disk.  Text formats\nare human readable but may take up more space.  Binary formats offer\nsignificantly better I/O performance." + "\n\n" + "Send bug reports or comments to <s-space-research-dev@googlegroups.com>.");
    }

    protected SemanticSpaceIO.SSpaceFormat getSpaceFormat() {
        return SemanticSpaceIO.SSpaceFormat.TEXT;
    }

    protected void addExtraOptions(ArgOptions argOptions) {
    }

    protected void handleExtraOptions() {
    }

    protected void postProcessing() {
    }

    protected Properties setupProperties() {
        Properties properties = System.getProperties();
        return properties;
    }

    protected ArgOptions setupOptions() {
        ArgOptions argOptions = new ArgOptions();
        argOptions.addOption('f', "fileList", "a list of document files", true, "FILE[,FILE...]", "Required (at least one of)");
        argOptions.addOption('d', "docFile", "a file where each line is a document", true, "FILE[,FILE...]", "Required (at least one of)");
        argOptions.addOption('R', "corpusReader", "Specifies a CorpusReader which will automatically parse the document files that are not in the formats expected by -f and -d.", true, "CLASSNAME,FILE[,FILE...]", "Required (at least one of)");
        argOptions.addOption('o', "outputFormat", "the .sspace format to use", true, "FORMAT", "Program Options");
        if (this.isMultiThreaded) {
            argOptions.addOption('t', "threads", "the number of threads to use", true, "INT", "Program Options");
        }
        argOptions.addOption('w', "overwrite", "specifies whether to overwrite the existing output", true, "BOOL", "Program Options");
        argOptions.addOption('v', "verbose", "prints verbose output", false, null, "Program Options");
        argOptions.addOption('Z', "stemmingAlgorithm", "specifices the stemming algorithm to use on tokens while iterating.  (default: none)", true, "CLASSNAME", "Tokenizing Options");
        argOptions.addOption('F', "tokenFilter", "filters to apply to the input token stream", true, "FILTER_SPEC", "Tokenizing Options");
        argOptions.addOption('C', "compoundWords", "a file where each line is a recognized compound word", true, "FILE", "Tokenizing Options");
        argOptions.addOption('z', "wordLimit", "Set the maximum number of words an document can return", true, "INT", "Tokenizing Options");
        this.addExtraOptions(argOptions);
        return argOptions;
    }

    protected Iterator<Document> getDocumentIterator() throws IOException {
        LinkedList<Iterator<Document>> linkedList = new LinkedList<Iterator<Document>>();
        if (this.argOptions.hasOption('R')) {
            this.addCorpusReaderIterators(linkedList, this.argOptions.getStringOption('R').split(","));
        }
        if (this.argOptions.hasOption('f')) {
            this.addFileIterators(linkedList, this.argOptions.getStringOption('f').split(","));
        }
        if (this.argOptions.hasOption('d')) {
            this.addDocIterators(linkedList, this.argOptions.getStringOption('d').split(","));
        }
        if (linkedList.size() == 0) {
            throw new Error("Must specify document sources");
        }
        CombinedIterator<Document> combinedIterator = new CombinedIterator<Document>(linkedList);
        if (this.argOptions.hasOption("docLimit")) {
            return new LimitedIterator<Document>(combinedIterator, this.argOptions.getIntOption("docLimit"));
        }
        return combinedIterator;
    }

    protected void addCorpusReaderIterators(Collection<Iterator<Document>> collection, String[] stringArray) throws IOException {
        CorpusReader corpusReader = (CorpusReader)ReflectionUtil.getObjectInstance(stringArray[0]);
        for (int i = 1; i < stringArray.length; ++i) {
            collection.add(corpusReader.read(new File(stringArray[0])));
        }
    }

    protected void addFileIterators(Collection<Iterator<Document>> collection, String[] stringArray) throws IOException {
        for (String string : stringArray) {
            collection.add(new FileListDocumentIterator(string));
        }
    }

    protected void addDocIterators(Collection<Iterator<Document>> collection, String[] stringArray) throws IOException {
        for (String string : stringArray) {
            collection.add(new OneLinePerDocumentIterator(string));
        }
    }

    public void run(String[] stringArray) throws Exception {
        int n;
        if (stringArray.length == 0) {
            this.usage();
            System.exit(1);
        }
        this.argOptions.parseOptions(stringArray);
        if (this.argOptions.numPositionalArgs() == 0) {
            throw new IllegalArgumentException("must specify output path");
        }
        boolean bl = this.verbose = this.argOptions.hasOption('v') || this.argOptions.hasOption("verbose");
        if (this.verbose) {
            LoggerUtil.setLevel(Level.FINE);
        }
        int n2 = n = this.isMultiThreaded ? Runtime.getRuntime().availableProcessors() : 1;
        if (this.argOptions.hasOption("threads")) {
            n = this.argOptions.getIntOption("threads");
        }
        boolean bl2 = true;
        if (this.argOptions.hasOption("overwrite")) {
            bl2 = this.argOptions.getBooleanOption("overwrite");
        }
        this.handleExtraOptions();
        Properties properties = this.setupProperties();
        if (this.argOptions.hasOption("tokenFilter")) {
            properties.setProperty("edu.ucla.sspace.text.TokenizerFactory.tokenFilter", this.argOptions.getStringOption("tokenFilter"));
        }
        if (this.argOptions.hasOption("stemmingAlgorithm")) {
            properties.setProperty("edu.ucla.sspace.text.TokenizerFactory.stemmer", this.argOptions.getStringOption("stemmingAlgorithm"));
        }
        if (this.argOptions.hasOption("compoundWords")) {
            properties.setProperty("edu.ucla.sspace.text.TokenizerFactory.compoundTokens", this.argOptions.getStringOption("compoundWords"));
        }
        if (this.argOptions.hasOption("wordLimit")) {
            properties.setProperty("edu.ucla.sspace.text.TokenizerFactory.tokenCountLimit", this.argOptions.getStringOption("wordLimit"));
        }
        IteratorFactory.setProperties(properties);
        SemanticSpace semanticSpace = this.getSpace();
        Iterator<Document> iterator = this.getDocumentIterator();
        this.processDocumentsAndSpace(semanticSpace, iterator, n, properties);
        File file = new File(this.argOptions.getPositionalArg(0));
        File file2 = null;
        if (file.isDirectory()) {
            file2 = bl2 ? new File(file, semanticSpace.getSpaceName() + EXT) : File.createTempFile(semanticSpace.getSpaceName(), EXT, file);
        } else if (file.exists() && !bl2) {
            File file3;
            String string = file.getName();
            int n3 = string.lastIndexOf(".");
            String string2 = n3 < 0 && n3 + 1 < string.length() ? "" : string.substring(n3);
            String string3 = string.substring(0, n3);
            if (string3.length() < 3) {
                string3 = string3 + Math.abs(Math.random() * 32767.0 * 10.0);
            }
            if ((file3 = file.getParentFile()) == null) {
                file3 = new File("");
            }
            System.out.println("base dir: " + file3);
            file2 = File.createTempFile(string3, string2, file3);
        } else {
            file2 = file;
        }
        System.out.println("output File: " + file2);
        long l = System.currentTimeMillis();
        this.saveSSpace(semanticSpace, file2);
        long l2 = System.currentTimeMillis();
        this.verbose("printed space in %.3f seconds", (double)(l2 - l) / 1000.0);
        this.postProcessing();
    }

    protected void saveSSpace(SemanticSpace semanticSpace, File file) throws IOException {
        SemanticSpaceIO.SSpaceFormat sSpaceFormat = this.argOptions.hasOption("outputFormat") ? SemanticSpaceIO.SSpaceFormat.valueOf(this.argOptions.getStringOption("outputFormat").toUpperCase()) : this.getSpaceFormat();
        SemanticSpaceIO.save(semanticSpace, file, sSpaceFormat);
    }

    protected void processDocumentsAndSpace(SemanticSpace semanticSpace, Iterator<Document> iterator, int n, Properties properties) throws Exception {
        this.parseDocumentsMultiThreaded(semanticSpace, iterator, n);
        long l = System.currentTimeMillis();
        semanticSpace.processSpace(properties);
        long l2 = System.currentTimeMillis();
        this.verbose("processed space in %.3f seconds", (double)(l2 - l) / 1000.0);
    }

    protected void parseDocumentsSingleThreaded(SemanticSpace semanticSpace, Iterator<Document> iterator) throws IOException {
        long l = System.currentTimeMillis();
        int n = 0;
        while (iterator.hasNext()) {
            long l2 = System.currentTimeMillis();
            Document document = iterator.next();
            int n2 = ++n;
            boolean bl = false;
            semanticSpace.processDocument(document.reader());
            long l3 = System.currentTimeMillis();
            this.verbose("processed document #%d in %.3f seconds", n2, (double)(l3 - l2) / 1000.0);
        }
        this.verbose("Processed all %d documents in %.3f total seconds", n, (double)(System.currentTimeMillis() - l) / 1000.0);
    }

    protected void parseDocumentsMultiThreaded(final SemanticSpace semanticSpace, final Iterator<Document> iterator, int n) throws IOException, InterruptedException {
        LinkedList<1> linkedList = new LinkedList<1>();
        final AtomicInteger atomicInteger = new AtomicInteger(0);
        for (int i = 0; i < n; ++i) {
            Thread thread = new Thread(){

                @Override
                public void run() {
                    while (iterator.hasNext()) {
                        long l = System.currentTimeMillis();
                        Document document = (Document)iterator.next();
                        int n = atomicInteger.incrementAndGet();
                        boolean bl = false;
                        try {
                            semanticSpace.processDocument(document.reader());
                        }
                        catch (Throwable throwable) {
                            throwable.printStackTrace();
                        }
                        long l2 = System.currentTimeMillis();
                        GenericMain.this.verbose("parsed document #%d in %.3f seconds", n, (double)(l2 - l) / 1000.0);
                    }
                }
            };
            linkedList.add(thread);
        }
        long l = System.currentTimeMillis();
        for (Thread thread : linkedList) {
            thread.start();
        }
        this.verbose("Beginning processing using %d threads", n);
        for (Thread thread : linkedList) {
            thread.join();
        }
        this.verbose("Processed all %d documents in %.3f total seconds", atomicInteger.get(), (double)(System.currentTimeMillis() - l) / 1000.0);
    }

    protected static Set<String> loadValidTermSet(String string) throws IOException {
        HashSet<String> hashSet = new HashSet<String>();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
        String string2 = null;
        while ((string2 = bufferedReader.readLine()) != null) {
            hashSet.add(string2);
        }
        bufferedReader.close();
        return hashSet;
    }

    protected void verbose(String string) {
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.logp(Level.FINE, this.getClass().getName(), "verbose", string);
        }
    }

    protected void verbose(String string, Object ... objectArray) {
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.logp(Level.FINE, this.getClass().getName(), "verbose", String.format(string, objectArray));
        }
    }
}

