/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.text.corpora;

import edu.ucla.sspace.text.DirectoryCorpusReader;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.DocumentPreprocessor;
import edu.ucla.sspace.text.StringDocument;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOError;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.Iterator;

public class BloglinesCorpusReader
extends DirectoryCorpusReader<Document> {
    public BloglinesCorpusReader() {
    }

    public BloglinesCorpusReader(DocumentPreprocessor documentPreprocessor) {
        super(documentPreprocessor);
    }

    @Override
    protected Iterator<Document> corpusIterator(Iterator<File> iterator) {
        return new BloglinesIterator(iterator);
    }

    public class BloglinesIterator
    extends DirectoryCorpusReader.BaseFileIterator {
        private BufferedReader bloglinesReader;

        public BloglinesIterator(Iterator<File> iterator) {
            super(iterator);
        }

        @Override
        protected void setupCurrentDoc(File file) {
            try {
                this.bloglinesReader = new BufferedReader(new FileReader(file));
            }
            catch (IOException iOException) {
                throw new IOError(iOException);
            }
        }

        protected Document advanceInDoc() {
            String string = null;
            StringBuilder stringBuilder = null;
            boolean bl = false;
            try {
                while ((string = this.bloglinesReader.readLine()) != null) {
                    int n;
                    int n2;
                    if (string.contains("<content>")) {
                        n2 = string.indexOf(">") + 1;
                        n = string.lastIndexOf("<");
                        if (n > n2) {
                            String string2 = string.substring(n2, n);
                            string2 = this.cleanDoc(string2);
                            return new StringDocument(string2);
                        }
                        stringBuilder = new StringBuilder(string.substring(n2));
                        bl = true;
                        continue;
                    }
                    if (string.contains("</content>")) {
                        bl = false;
                        n2 = string.lastIndexOf("<");
                        stringBuilder.append(string.substring(0, n2));
                        return new StringDocument(this.cleanDoc(stringBuilder.toString()));
                    }
                    if (string.contains("<updated>") && stringBuilder != null) {
                        n2 = string.indexOf(">") + 1;
                        String string3 = string.substring(n2, n = string.lastIndexOf("<"));
                        long l = string3.equals("") ? 0L : Timestamp.valueOf(string3).getTime();
                        String string4 = String.format("%d %s", l, this.cleanDoc(stringBuilder.toString()));
                        return new StringDocument(string4);
                    }
                    if (!bl || stringBuilder == null) continue;
                    stringBuilder.append(string);
                }
            }
            catch (IOException iOException) {
                throw new IOError(iOException);
            }
            return null;
        }
    }
}

