/*
Copyright 2012-2014 Samuel Gesche

This file is part of the Greek Reuse Toolkit.

The Greek Reuse Toolkit is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

The Greek Reuse Toolkit is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with the Greek Reuse Toolkit.  If not, see <http://www.gnu.org/licenses/>.
*/

package fr.cnrs.liris.drim.grt.modele;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import fr.cnrs.liris.drim.grt.proc.Texte;
import fr.cnrs.liris.drim.grt.proc.recherche.Outils;

/**
 *
 * @author sgesche
 */
public class Corpus {
    private ArrayList<Passage> documents;
    
    public Corpus() {
        documents = new ArrayList<>();
    }
    
    public void addDocument(Passage aAjouter) {
        documents.add(aAjouter);
    }
    
    public void removeDocument(Passage aEnlever) {
        documents.remove(aEnlever);
    }
    
    public Passage[] getDocuments() {
        Passage[] resultat = new Passage[documents.size()];
        documents.toArray(resultat);
        return resultat;
    }
    
    public String getTexteComplet() {
        String s = "";
        for(Passage d: documents) {
            s += d.getTexteComplet();
        }
        return s;
    }
    
    /**
     * Fournit la liste des différents caractères employés dans ce corpus, sous 
     * une forme textuelle.
     * @return ledit texte
     */
    public String getListeCaracteres() {
        HashSet<Character> res = new HashSet<>();
        for(Passage d: documents) {
            res.addAll(d.listeCaracteresTotale());
        }
        String resultat = "";
        for(Character c: res) {
            resultat += ((int)(c.charValue())) + " - " + c.toString() + "\n";
        }
        return resultat;
    }
    
    /**
     * Fournit la liste de tous les termes différents de ce corpus.
     * @return ladite liste
     */
    public Terme[] getListeTermes() {
        Set<Terme> resultat = new HashSet<>();
        for(Passage d: documents) {
            resultat.addAll(Arrays.asList(new Texte(d).getContenu()));
        }
        Terme[] res = new Terme[resultat.size()];
        resultat.toArray(res);
        return res;
    }
}
