/*
 * Decompiled with CFR 0.152.
 */
package edu.ucla.sspace.text.corpora;

import edu.ucla.sspace.text.DirectoryCorpusReader;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.DocumentPreprocessor;
import edu.ucla.sspace.text.StringDocument;
import java.io.File;
import java.io.IOError;
import java.io.IOException;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class ChildesCorpusReader
extends DirectoryCorpusReader<Document> {
    public ChildesCorpusReader() {
    }

    public ChildesCorpusReader(DocumentPreprocessor documentPreprocessor) {
        super(documentPreprocessor);
    }

    @Override
    protected Iterator<Document> corpusIterator(Iterator<File> iterator) {
        return new ChildesFileIterator(iterator);
    }

    public class ChildesFileIterator
    extends DirectoryCorpusReader.BaseFileIterator {
        private NodeList utterances;
        private int currentNodeIndex;
        private boolean oneUtterancePerDoc;
        private final DocumentBuilder db;

        public ChildesFileIterator(Iterator<File> iterator) {
            super(iterator);
            this.oneUtterancePerDoc = true;
            DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder documentBuilder = null;
            try {
                documentBuilder = documentBuilderFactory.newDocumentBuilder();
            }
            catch (ParserConfigurationException parserConfigurationException) {
                parserConfigurationException.printStackTrace();
            }
            this.db = documentBuilder;
        }

        @Override
        protected void setupCurrentDoc(File file) {
            try {
                org.w3c.dom.Document document = this.db.parse(file);
                this.utterances = document.getElementsByTagName("u");
                this.currentNodeIndex = 0;
            }
            catch (SAXException sAXException) {
                sAXException.printStackTrace();
            }
            catch (IOException iOException) {
                throw new IOError(iOException);
            }
        }

        protected Document advanceInDoc() {
            if (this.currentNodeIndex >= this.utterances.getLength()) {
                return null;
            }
            StringBuilder stringBuilder = new StringBuilder();
            if (this.oneUtterancePerDoc) {
                this.addTextFromUtterance((Element)this.utterances.item(this.currentNodeIndex++), stringBuilder);
            } else {
                for (int i = 0; i < this.utterances.getLength(); ++i) {
                    this.addTextFromUtterance((Element)this.utterances.item(i), stringBuilder);
                    stringBuilder.append(". ");
                }
            }
            return new StringDocument(stringBuilder.toString());
        }

        private void addTextFromUtterance(Element element, StringBuilder stringBuilder) {
            NodeList nodeList = element.getElementsByTagName("w");
            for (int i = 0; i < nodeList.getLength(); ++i) {
                Element element2 = (Element)nodeList.item(i);
                String string = element2.getFirstChild().getNodeValue();
                stringBuilder.append(string).append(" ");
            }
        }
    }
}

