/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.purandare;

import edu.ucla.sspace.clustering.Assignments;
import edu.ucla.sspace.clustering.ClutoClustering;
import edu.ucla.sspace.common.SemanticSpace;
import edu.ucla.sspace.common.Statistics;
import edu.ucla.sspace.matrix.AtomicGrowingMatrix;
import edu.ucla.sspace.matrix.AtomicMatrix;
import edu.ucla.sspace.matrix.Matrix;
import edu.ucla.sspace.matrix.SparseMatrix;
import edu.ucla.sspace.matrix.SparseRowMaskedMatrix;
import edu.ucla.sspace.matrix.YaleSparseMatrix;
import edu.ucla.sspace.text.IteratorFactory;
import edu.ucla.sspace.util.SparseHashArray;
import edu.ucla.sspace.util.WorkerThread;
import edu.ucla.sspace.vector.CompactSparseVector;
import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.VectorMath;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOError;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.BitSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;

public class PurandareFirstOrder
implements SemanticSpace {
    private static final Logger LOGGER = Logger.getLogger(PurandareFirstOrder.class.getName());
    private static final String PROPERTY_PREFIX = "edu.ucla.sspace.purandare.PurandareFirstOrder";
    public static final String MAX_CONTEXTS_PER_WORD = "edu.ucla.sspace.purandare.PurandareFirstOrder.maxContexts";
    private final Map<String, Integer> termToIndex;
    private final Map<String, DoubleVector> termToVector;
    private final int windowSize;
    private final int contextWindowSize;
    private final AtomicMatrix cooccurrenceMatrix = new AtomicGrowingMatrix();
    private final List<AtomicInteger> termCounts;
    private File compressedDocuments;
    private DataOutputStream compressedDocumentsWriter;
    private final AtomicInteger documentCounter;
    private final int maxContextsPerWord;
    private int wordIndexCounter;

    public PurandareFirstOrder() {
        this(System.getProperties());
    }

    public PurandareFirstOrder(Properties properties) {
        this.termToIndex = new ConcurrentHashMap<String, Integer>();
        this.termToVector = new ConcurrentHashMap<String, DoubleVector>();
        this.termCounts = new CopyOnWriteArrayList<AtomicInteger>();
        this.windowSize = 5;
        this.contextWindowSize = 20;
        this.documentCounter = new AtomicInteger(0);
        String string = properties.getProperty(MAX_CONTEXTS_PER_WORD);
        if (string == null) {
            this.maxContextsPerWord = Integer.MAX_VALUE;
        } else {
            int n = Integer.parseInt(string);
            if (n <= 0) {
                throw new IllegalArgumentException("The number of contexts must be a positive number");
            }
            this.maxContextsPerWord = n;
        }
        try {
            this.compressedDocuments = File.createTempFile("petersen-documents", ".dat");
            this.compressedDocumentsWriter = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(this.compressedDocuments)));
        }
        catch (IOException iOException) {
            throw new IOError(iOException);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void processDocument(BufferedReader bufferedReader) throws IOException {
        int n;
        this.documentCounter.getAndIncrement();
        ArrayDeque<String> arrayDeque = new ArrayDeque<String>();
        ArrayDeque<String> arrayDeque2 = new ArrayDeque<String>();
        Iterator<String> iterator = IteratorFactory.tokenizeOrdered(bufferedReader);
        String string = null;
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(4096);
        DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
        int n2 = 0;
        int n3 = 0;
        for (n = 0; n < this.windowSize && iterator.hasNext(); ++n) {
            arrayDeque.offer(iterator.next());
        }
        while (!arrayDeque.isEmpty()) {
            int n4;
            ++n2;
            string = (String)arrayDeque.remove();
            if (iterator.hasNext()) {
                String string2 = iterator.next();
                arrayDeque.offer(string2);
            }
            if (string.equals("")) {
                dataOutputStream.writeInt(-1);
                arrayDeque2.offer(string);
                if (arrayDeque2.size() <= this.windowSize) continue;
                arrayDeque2.remove();
                continue;
            }
            n = this.getIndexFor(string);
            dataOutputStream.writeInt(n);
            this.termCounts.get(n).incrementAndGet();
            ++n3;
            for (String string3 : arrayDeque) {
                if (string3.equals("")) continue;
                n4 = this.getIndexFor(string3);
                this.cooccurrenceMatrix.addAndGet(n, n4, 1.0);
            }
            for (String string3 : arrayDeque2) {
                if (string3.equals("")) continue;
                n4 = this.getIndexFor(string3);
                this.cooccurrenceMatrix.addAndGet(n, n4, 1.0);
            }
            arrayDeque2.offer(string);
            if (arrayDeque2.size() <= this.windowSize) continue;
            arrayDeque2.remove();
        }
        dataOutputStream.close();
        byte[] byArray = byteArrayOutputStream.toByteArray();
        Iterator iterator2 = this.compressedDocumentsWriter;
        synchronized (iterator2) {
            this.compressedDocumentsWriter.writeInt(n2);
            this.compressedDocumentsWriter.writeInt(n3);
            this.compressedDocumentsWriter.write(byArray, 0, byArray.length);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private final int getIndexFor(String string) {
        Integer n = this.termToIndex.get(string);
        if (n == null) {
            PurandareFirstOrder purandareFirstOrder = this;
            synchronized (purandareFirstOrder) {
                n = this.termToIndex.get(string);
                if (n == null) {
                    int n2 = this.wordIndexCounter++;
                    this.termCounts.add(new AtomicInteger(0));
                    this.termToIndex.put(string, n2);
                    return n2;
                }
            }
        }
        return n;
    }

    @Override
    public Set<String> getWords() {
        return Collections.unmodifiableSet(this.termToVector.keySet());
    }

    @Override
    public DoubleVector getVector(String string) {
        return this.termToVector.get(string);
    }

    @Override
    public void processSpace(Properties properties) {
        try {
            this.processSpace();
        }
        catch (IOException iOException) {
            throw new IOError(iOException);
        }
    }

    private void processSpace() throws IOException {
        this.compressedDocumentsWriter.close();
        String[] stringArray = new String[this.termToIndex.size()];
        for (Map.Entry<String, Integer> object : this.termToIndex.entrySet()) {
            stringArray[object.getValue().intValue()] = object.getKey();
        }
        int n = 0;
        for (AtomicInteger atomicInteger : this.termCounts) {
            n += atomicInteger.get();
        }
        final int n2 = this.cooccurrenceMatrix.rows();
        LOGGER.info("calculating term features");
        final BitSet[] bitSetArray = new BitSet[this.wordIndexCounter];
        for (int i = 0; i < n2; ++i) {
            String semaphore = stringArray[i];
            bitSetArray[i] = this.calculateTermFeatures(semaphore, n);
        }
        LOGGER.info("reprocessing corpus to generate feature vectors");
        LinkedBlockingQueue<Runnable> linkedBlockingQueue = new LinkedBlockingQueue<Runnable>();
        for (int i = 0; i < Runtime.getRuntime().availableProcessors(); ++i) {
            WorkerThread interruptedException = new WorkerThread(linkedBlockingQueue);
            interruptedException.start();
        }
        final Semaphore semaphore = new Semaphore(0);
        int n3 = 0;
        while (n3 < n2) {
            final String string = stringArray[n3];
            final int n4 = n3++;
            linkedBlockingQueue.offer(new Runnable(){

                /*
                 * WARNING - Removed try catching itself - possible behaviour change.
                 */
                @Override
                public void run() {
                    try {
                        LOGGER.fine(String.format("processing term %6d/%d: %s", n4, n2, string));
                        Matrix matrix = PurandareFirstOrder.this.getTermContexts(n4, bitSetArray[n4]);
                        PurandareFirstOrder.this.senseInduce(string, matrix);
                    }
                    catch (IOException iOException) {
                        iOException.printStackTrace();
                    }
                    finally {
                        semaphore.release();
                    }
                }
            });
        }
        try {
            semaphore.acquire(n2);
        }
        catch (InterruptedException interruptedException) {
            throw new Error("interrupted while waiting for terms to finish reprocessing", interruptedException);
        }
        LOGGER.info("finished reprocessing all terms");
    }

    private BitSet calculateTermFeatures(String string, int n) {
        int n2 = this.termToIndex.get(string);
        LOGGER.fine(String.format("Calculating feature set for %6d/%d: %s", n2, this.cooccurrenceMatrix.rows(), string));
        DoubleVector doubleVector = this.cooccurrenceMatrix.getRowVector(n2);
        int n3 = this.termCounts.get(n2).get();
        BitSet bitSet = new BitSet(this.wordIndexCounter);
        for (int i = 0; i < doubleVector.length(); ++i) {
            double d;
            double d2;
            double d3;
            double d4;
            double d5;
            double d6 = doubleVector.get(i);
            if (d6 == 0.0 || !((d5 = PurandareFirstOrder.logLikelihood(d4 = d6, d3 = (double)this.termCounts.get(i).get() - d6, d2 = (double)n3 - d6, d = (double)n - (d4 + d3 + d2))) > 3.841)) continue;
            bitSet.set(i);
        }
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine(string + " had " + bitSet.cardinality() + " features");
        }
        return bitSet;
    }

    private Matrix getTermContexts(int n, BitSet bitSet) throws IOException {
        DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(new FileInputStream(this.compressedDocuments)));
        int n2 = this.documentCounter.get();
        SparseMatrix sparseMatrix = new YaleSparseMatrix(this.termCounts.get(n).get(), this.termToIndex.size());
        int n3 = 0;
        for (int i = 0; i < n2; ++i) {
            int n4;
            int n5 = i;
            int n6 = dataInputStream.readInt();
            int n7 = dataInputStream.readInt();
            int[] nArray = new int[n6];
            for (n4 = 0; n4 < n6; ++n4) {
                nArray[n4] = dataInputStream.readInt();
            }
            n4 = this.processIntDocument(n, nArray, sparseMatrix, n3, bitSet);
            n3 += n4;
        }
        dataInputStream.close();
        if (this.maxContextsPerWord < Integer.MAX_VALUE && sparseMatrix.rows() > this.maxContextsPerWord) {
            BitSet bitSet2 = Statistics.randomDistribution(this.maxContextsPerWord, sparseMatrix.rows());
            sparseMatrix = new SparseRowMaskedMatrix(sparseMatrix, bitSet2);
        }
        return sparseMatrix;
    }

    private void senseInduce(String string, Matrix matrix) throws IOException {
        int n;
        int n2;
        LOGGER.fine("Clustering " + matrix.rows() + " contexts for " + string);
        int n3 = Math.min(7, matrix.rows());
        if (!string.matches("[a-zA-z]+") || n3 <= 6) {
            CompactSparseVector compactSparseVector = new CompactSparseVector(this.termToIndex.size());
            int n4 = matrix.rows();
            for (int i = 0; i < n4; ++i) {
                VectorMath.add(compactSparseVector, matrix.getRowVector(i));
            }
            this.termToVector.put(string, compactSparseVector);
            return;
        }
        Assignments assignments = new ClutoClustering().cluster(matrix, n3, ClutoClustering.Method.AGGLOMERATIVE, ClutoClustering.Criterion.UPGMA);
        LOGGER.fine("Generative sense vectors for " + string);
        int[] nArray = new int[n3];
        CompactSparseVector[] compactSparseVectorArray = new CompactSparseVector[n3];
        for (n2 = 0; n2 < compactSparseVectorArray.length; ++n2) {
            compactSparseVectorArray[n2] = new CompactSparseVector(this.termToIndex.size());
        }
        for (n2 = 0; n2 < assignments.size(); ++n2) {
            if (assignments.get(n2).assignments().length == 0) continue;
            int n5 = n = assignments.get(n2).assignments()[0];
            nArray[n5] = nArray[n5] + 1;
            DoubleVector doubleVector = matrix.getRowVector(n2);
            VectorMath.add(compactSparseVectorArray[n], doubleVector);
        }
        n2 = 0;
        for (n = 0; n < n3; ++n) {
            int n6 = nArray[n];
            if (!((double)n6 / (double)matrix.rows() > 0.02)) continue;
            String string2 = n2 == 0 ? string : string + "-" + n2;
            ++n2;
            this.termToVector.put(string2, compactSparseVectorArray[n]);
        }
        LOGGER.fine("Discovered " + n2 + " senses for " + string);
    }

    private int processIntDocument(int n, int[] nArray, Matrix matrix, int n2, BitSet bitSet) {
        int n3 = 0;
        for (int i = 0; i < nArray.length; ++i) {
            int n4;
            int n5;
            int n6 = nArray[i];
            if (n6 != n) continue;
            SparseHashArray<Integer> sparseHashArray = new SparseHashArray<Integer>();
            for (n5 = Math.max(i - this.contextWindowSize, 0); n5 < i; ++n5) {
                n4 = nArray[n5];
                if (n4 < 0 || !bitSet.get(n4)) continue;
                Integer n7 = (Integer)sparseHashArray.get(n4);
                sparseHashArray.set(n4, n7 == null ? 1 : n7 + 1);
            }
            n5 = Math.min(i + this.contextWindowSize, nArray.length);
            for (n4 = i + 1; n4 < n5; ++n4) {
                int n8 = nArray[n4];
                if (n8 < 0 || !bitSet.get(n8)) continue;
                Integer n9 = (Integer)sparseHashArray.get(n8);
                sparseHashArray.set(n8, n9 == null ? 1 : n9 + 1);
            }
            n4 = n2 + n3;
            for (Object object : (Integer)sparseHashArray.getElementIndices()) {
                matrix.set(n4, (int)object, ((Integer)sparseHashArray.get((int)object)).intValue());
            }
            ++n3;
        }
        return n3;
    }

    @Override
    public int getVectorLength() {
        return this.termToIndex.size();
    }

    @Override
    public String getSpaceName() {
        return "purandare-petersen";
    }

    private static double logLikelihood(double d, double d2, double d3, double d4) {
        double d5 = d + d3;
        double d6 = d2 + d4;
        double d7 = d + d2;
        double d8 = d3 + d4;
        double d9 = d7 + d8;
        double d10 = d7 / d9 * d5;
        double d11 = d7 / d9 * d6;
        double d12 = d8 / d9 * d5;
        double d13 = d8 / d9 * d6;
        double d14 = d == 0.0 ? 0.0 : d * Math.log(d / d10);
        double d15 = d2 == 0.0 ? 0.0 : d2 * Math.log(d2 / d11);
        double d16 = d3 == 0.0 ? 0.0 : d3 * Math.log(d3 / d12);
        double d17 = d4 == 0.0 ? 0.0 : d4 * Math.log(d4 / d13);
        return 2.0 * (d14 + d15 + d16 + d17);
    }
}

