/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.tools;

import edu.ucla.sspace.common.ArgOptions;
import edu.ucla.sspace.text.IteratorFactory;
import edu.ucla.sspace.util.LoggerUtil;
import edu.ucla.sspace.util.TrieMap;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;

public class BigramExtractor {
    private static final Logger LOGGER = Logger.getLogger(BigramExtractor.class.getName());
    private final Map<String, TokenStats> tokenCounts = new TrieMap<TokenStats>();
    private final Map<Long, Number> bigramCounts;
    private int tokenIndexCounter;
    private int numBigramsInCorpus;

    public BigramExtractor() {
        this(1000);
    }

    public BigramExtractor(int n) {
        this.bigramCounts = new HashMap<Long, Number>(n);
        this.tokenIndexCounter = 0;
        this.numBigramsInCorpus = 0;
    }

    private boolean excludeToken(String string) {
        return string.equals("");
    }

    public void process(String string) {
        this.process(IteratorFactory.tokenizeOrdered(string));
    }

    public void process(BufferedReader bufferedReader) {
        this.process(IteratorFactory.tokenizeOrdered(bufferedReader));
    }

    public void process(Iterator<String> iterator) {
        String string = null;
        String string2 = null;
        if (iterator.hasNext()) {
            string = iterator.next();
        }
        while (iterator.hasNext()) {
            string2 = string;
            string = iterator.next();
            if (this.excludeToken(string2) || this.excludeToken(string)) continue;
            this.processBigram(string2, string);
        }
    }

    private void processBigram(String string, String string2) {
        TokenStats tokenStats = this.getStatsFor(string);
        TokenStats tokenStats2 = this.getStatsFor(string2);
        ++tokenStats.count;
        ++tokenStats2.count;
        ++tokenStats.leftCount;
        ++tokenStats2.rightCount;
        ++this.numBigramsInCorpus;
        long l = (long)tokenStats.index << 32 | (long)tokenStats2.index;
        Number number = this.bigramCounts.get(l);
        int n = number == null ? 1 : 1 + number.intValue();
        Number number2 = null;
        number2 = n < 127 ? (Number)((byte)n) : (Number)(n < Short.MAX_VALUE ? (Number)((short)n) : (Number)n);
        this.bigramCounts.put(l, number2);
    }

    private TokenStats getStatsFor(String string) {
        TokenStats tokenStats = this.tokenCounts.get(string);
        if (tokenStats == null) {
            tokenStats = new TokenStats(this.tokenIndexCounter++);
            this.tokenCounts.put(string, tokenStats);
        }
        return tokenStats;
    }

    public void printBigrams(PrintWriter printWriter, SignificanceTest significanceTest, int n) {
        String[] stringArray = new String[this.tokenCounts.size()];
        for (Map.Entry<String, TokenStats> entry : this.tokenCounts.entrySet()) {
            stringArray[entry.getValue().index] = entry.getKey().toString();
        }
        LOGGER.info("Number of bigrams: " + this.bigramCounts.size());
        for (Map.Entry<Object, Object> entry : this.bigramCounts.entrySet()) {
            long l = (Long)entry.getKey();
            int n2 = (int)(l >>> 32);
            int n3 = (int)(l & 0xFFFFFFFFL);
            int n4 = ((Number)entry.getValue()).intValue();
            TokenStats tokenStats = this.tokenCounts.get(stringArray[n2]);
            TokenStats tokenStats2 = this.tokenCounts.get(stringArray[n3]);
            if (tokenStats.count < n || tokenStats2.count < n) continue;
            int[] nArray = this.getContingencyTable(tokenStats, tokenStats2, n4);
            double d = this.getScore(nArray, significanceTest);
            printWriter.println(d + " " + stringArray[n2] + " " + stringArray[n3]);
        }
    }

    private double getScore(int[] nArray, SignificanceTest significanceTest) {
        switch (significanceTest) {
            case PMI: {
                return this.pmi(nArray);
            }
            case CHI_SQUARED: {
                return this.chiSq(nArray);
            }
            case LOG_LIKELIHOOD: {
                return this.logLikelihood(nArray);
            }
        }
        throw new Error((Object)((Object)significanceTest) + " not implemented yet");
    }

    private double pmi(int[] nArray) {
        int[] nArray2 = nArray;
        double d = (double)nArray2[0] / (double)this.numBigramsInCorpus;
        double d2 = (double)(nArray2[0] + nArray2[2]) / (double)this.numBigramsInCorpus;
        double d3 = (double)(nArray2[0] + nArray2[1]) / (double)this.numBigramsInCorpus;
        return d / (d2 * d3);
    }

    private double chiSq(int[] nArray) {
        int[] nArray2 = nArray;
        int n = nArray2[0] + nArray2[2];
        int n2 = nArray2[1] + nArray2[3];
        int n3 = nArray2[0] + nArray2[1];
        int n4 = nArray2[2] + nArray2[3];
        double d = n3 + n4;
        double d2 = (double)n3 / d * (double)n;
        double d3 = (double)n3 / d * (double)n2;
        double d4 = (double)n4 / d * (double)n;
        double d5 = (double)n4 / d * (double)n2;
        return ((double)nArray2[0] - d2) * ((double)nArray2[0] - d2) / d2 + ((double)nArray2[1] - d3) * ((double)nArray2[1] - d3) / d3 + ((double)nArray2[2] - d4) * ((double)nArray2[2] - d4) / d4 + ((double)nArray2[3] - d5) * ((double)nArray2[3] - d5) / d5;
    }

    private double logLikelihood(int[] nArray) {
        int[] nArray2 = nArray;
        int n = nArray2[0] + nArray2[2];
        int n2 = nArray2[1] + nArray2[3];
        int n3 = nArray2[0] + nArray2[1];
        int n4 = nArray2[2] + nArray2[3];
        double d = n3 + n4;
        double d2 = (double)n3 / d * (double)n;
        double d3 = (double)n3 / d * (double)n2;
        double d4 = (double)n4 / d * (double)n;
        double d5 = (double)n4 / d * (double)n2;
        return 2.0 * ((double)nArray2[0] * Math.log((double)nArray2[0] - d2) + (double)nArray2[1] * Math.log((double)nArray2[1] - d3) + (double)nArray2[2] * Math.log((double)nArray2[2] - d4) + (double)nArray2[3] * Math.log((double)nArray2[3] - d5));
    }

    private int[] getContingencyTable(TokenStats tokenStats, TokenStats tokenStats2, int n) {
        int n2 = tokenStats.leftCount;
        int n3 = tokenStats2.rightCount;
        int n4 = n;
        int n5 = n3 - n4;
        int n6 = n2 - n4;
        int n7 = this.numBigramsInCorpus - (n5 + n6 + n4);
        return new int[]{n4, n5, n6, n7};
    }

    public static void main(String[] stringArray) {
        ArgOptions argOptions = new ArgOptions();
        argOptions.addOption('F', "tokenFilter", "filters to apply to the input token stream", true, "FILTER_SPEC", "Tokenizing Options");
        argOptions.addOption('M', "minFreq", "minimum frequency of the reported bigrams", true, "INT", "Bigram Options");
        argOptions.addOption('v', "verbose", "Print verbose output about counting status", false, null, "Program Options");
        argOptions.parseOptions(stringArray);
        if (argOptions.numPositionalArgs() < 3) {
            System.out.println("usage: java BigramExtractor [options] <OutputFile> <SignificanceTest> <InputFile> [<InputFile>...]\n significance test options: " + SignificanceTest.values() + "\n" + argOptions.prettyPrint());
            return;
        }
        if (argOptions.hasOption("verbose")) {
            LoggerUtil.setLevel(Level.FINE);
        }
        Properties properties = System.getProperties();
        if (argOptions.hasOption("tokenFilter")) {
            properties.setProperty("edu.ucla.sspace.text.TokenizerFactory.tokenFilter", argOptions.getStringOption("tokenFilter"));
        }
        IteratorFactory.setProperties(properties);
        try {
            int n;
            BigramExtractor bigramExtractor = new BigramExtractor(1000000);
            String string = argOptions.getPositionalArg(1).toUpperCase();
            SignificanceTest significanceTest = SignificanceTest.valueOf(string);
            PrintWriter printWriter = new PrintWriter(argOptions.getPositionalArg(0));
            int n2 = argOptions.numPositionalArgs();
            for (n = 2; n < n2; ++n) {
                String string2 = argOptions.getPositionalArg(n);
                BufferedReader bufferedReader = new BufferedReader(new FileReader(string2));
                int n3 = 0;
                String string3 = null;
                while ((string3 = bufferedReader.readLine()) != null) {
                    bigramExtractor.process(string3);
                    if (++n3 % 10000 != 0) continue;
                    LOGGER.fine(string2 + ": processed document " + n3);
                }
                bufferedReader.close();
            }
            n = argOptions.hasOption("minFreq") ? argOptions.getIntOption("minFreq") : 0;
            bigramExtractor.printBigrams(printWriter, significanceTest, n);
        }
        catch (Exception exception) {
            exception.printStackTrace();
        }
    }

    private static class TokenStats {
        public int index;
        public int count;
        public int leftCount;
        public int rightCount;

        public TokenStats(int n) {
            this.index = n;
            this.count = 0;
            this.leftCount = 0;
            this.rightCount = 0;
        }
    }

    public static enum SignificanceTest {
        CHI_SQUARED,
        FISHERS_EXACT,
        BARNARDS,
        PMI,
        LOG_LIKELIHOOD;

    }
}

