/*
Copyright 2012-2014 Samuel Gesche

This file is part of the Greek Reuse Toolkit.

The Greek Reuse Toolkit is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

The Greek Reuse Toolkit is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with the Greek Reuse Toolkit.  If not, see <http://www.gnu.org/licenses/>.
*/

package fr.cnrs.liris.drim.grt.proc;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import fr.cnrs.liris.drim.grt.modele.Coordonnee;
import fr.cnrs.liris.drim.grt.modele.Lemme;
import fr.cnrs.liris.drim.grt.modele.OrdreStrict;
import fr.cnrs.liris.drim.grt.modele.Passage;
import fr.cnrs.liris.drim.grt.modele.Terme;
import fr.cnrs.liris.drim.grt.modele.listes.LemmesRecurrents;
import fr.cnrs.liris.drim.grt.modele.listes.LemmesVides;

/**
 * Un Texte est une représentation d'un Passage qui n'a que son contenu, et pas sa structure.
 * L'usage du Texte à la place du Passage se justifie principalement quand on veut 
 * faire abstraction de la notion de sous-passage et traiter le contenu 
 * comme un tout.
 * @author sgesche
 */
public class Texte {
    private ArrayList<Terme> contenu;
    private Passage source;
    private Map<Terme, Integer> nbOccurrencesTermes;
    private Map<Terme, Integer> nbOccurrencesTermesNormalises;
    private Map<Lemme, Integer> nbOccurrencesLemmes;
    private ArrayList<Passage> listeVersets;
    private ArrayList<Passage> listeLignes;
    
    public Texte(Passage aPlaquer) {
        this.contenu = new ArrayList<>();
        this.source = aPlaquer;
        this.nbOccurrencesTermes = new HashMap<>();
        this.nbOccurrencesTermesNormalises = new HashMap<>();
        this.nbOccurrencesLemmes = new HashMap<>();
        this.listeVersets = new ArrayList<>();
        this.listeLignes = new ArrayList<>();
        ajouteSousContenu(aPlaquer);
    }
    
    public Texte(Terme[] dejaPlaque) {
        this.contenu = new ArrayList<>();
        this.source = null;
        this.nbOccurrencesTermes = new HashMap<>();
        this.nbOccurrencesTermesNormalises = new HashMap<>();
        this.nbOccurrencesLemmes = new HashMap<>();
        this.listeVersets = new ArrayList<>();
        this.listeLignes = new ArrayList<>();
        ajouteSousContenu(dejaPlaque);
    }
    
    private void ajouteSousContenu(Passage parent) {
        if(!parent.hasContenu()) {
            for(Coordonnee c: parent.getSousPassagesAvecCoordonnees().keySet()) {
                if(c.getSysteme().equals(OrdreStrict.getInstance(OrdreStrict.VERSETS))) {
                    listeVersets.add(parent.getSousPassagesAvecCoordonnees().get(c));
                }
                if(c.getSysteme().equals(OrdreStrict.getInstance(OrdreStrict.LIGNES))) {
                    listeLignes.add(parent.getSousPassagesAvecCoordonnees().get(c));
                }
            }
            for(Passage enfant: parent.getSousPassages()) {
                ajouteSousContenu(enfant);
            }
        } else {
            Terme t = parent.getContenu();
            contenu.add(t);
            nbOccurrencesTermes.put(t, 
                    new Integer(nbOccurrencesTermes.get(t) == null ? 
                    1 : nbOccurrencesTermes.get(t).intValue() + 1));
            nbOccurrencesTermesNormalises.put(t.getFormeNormale(), 
                    new Integer(nbOccurrencesTermesNormalises.get(t.getFormeNormale()) == null ? 
                    1 : nbOccurrencesTermesNormalises.get(t.getFormeNormale()).intValue() + 1));
            for(Lemme l: t.getFormesLemmatisees()) {
                nbOccurrencesLemmes.put(l, 
                        new Integer(nbOccurrencesLemmes.get(l) == null ? 
                        1 : nbOccurrencesLemmes.get(l).intValue() + 1));
            }
        }
    }
    
    private void ajouteSousContenu(Terme[] listeTermes) {
        for(Terme t: listeTermes) {
            contenu.add(t);
            nbOccurrencesTermes.put(t, 
                    new Integer(nbOccurrencesTermes.get(t) == null ? 
                    1 : nbOccurrencesTermes.get(t).intValue() + 1));
            nbOccurrencesTermesNormalises.put(t.getFormeNormale(), 
                    new Integer(nbOccurrencesTermesNormalises.get(t.getFormeNormale()) == null ? 
                    1 : nbOccurrencesTermesNormalises.get(t.getFormeNormale()).intValue() + 1));
            for(Lemme l: t.getFormesLemmatisees()) {
                nbOccurrencesLemmes.put(l, 
                        new Integer(nbOccurrencesLemmes.get(l) == null ? 
                        1 : nbOccurrencesLemmes.get(l).intValue() + 1));
            }
        }
    }
    
    public Passage getSource() {
        return source;
    }
    
    public Terme[] getContenu() {
        Terme[] res = new Terme[contenu.size()];
        contenu.toArray(res);
        return res;
    }
    
    public int getNbOccurrencesTerme(Terme t) {
        if(contientTerme(t)) {
            return nbOccurrencesTermes.get(t).intValue();
        } else {
            return 0;
        }
    }
    
    public int getNbOccurrencesTermeNormalise(Terme t) {
        if(contientTerme(t)) {
            return nbOccurrencesTermesNormalises.get(t).intValue();
        } else {
            return 0;
        }
    }
    
    public boolean contientTerme(Terme t) {
        return nbOccurrencesTermes.containsKey(t);
    }
    
    public int getNbOccurrencesLemme(Lemme l) {
        if(nbOccurrencesLemmes.containsKey(l)) {
            return nbOccurrencesLemmes.get(l).intValue();
        } else {
            return 0;
        }
    }
    
    public boolean contientUnLemmeDe(Terme t) {
        boolean reponse = false;
        for(Lemme l: t.getFormesLemmatisees()) {
            if(getNbOccurrencesLemme(l) > 0) {
                reponse = true;
                break;
            }
        }
        return reponse;
    }
    
    public Set<Terme> getTousTermes() {
        return nbOccurrencesTermes.keySet();
    }
    
    public Set<Terme> getTousTermesNormalises() {
        return nbOccurrencesTermesNormalises.keySet();
    }
    
    public Set<Lemme> getTousLemmes() {
        return nbOccurrencesLemmes.keySet();
    }
    
    public Terme[] getTermesLesPlusFrequents(int quantite, boolean avecMotsVides) {
        // Trier les termes par nombre d'occurrences
        Map<Integer, ArrayList<Terme>> listeOccurrences = new HashMap<>();
        for(Terme aClasser: nbOccurrencesTermes.keySet()) {
            if(listeOccurrences.get(nbOccurrencesTermes.get(aClasser)) == null) {
                listeOccurrences.put(nbOccurrencesTermes.get(aClasser), new ArrayList<Terme>());
            }
            listeOccurrences.get(nbOccurrencesTermes.get(aClasser)).add(aClasser);
        }
        Integer[] listeNbOccurrences = new Integer[listeOccurrences.keySet().size()];
        listeOccurrences.keySet().toArray(listeNbOccurrences);
        Arrays.sort(listeNbOccurrences);
        //System.err.println("Liste triée : " + Arrays.toString(listeNbOccurrences));
        
        // Sélectionner les <quantite> premiers (sauf, éventuellement, les mots vides
        ArrayList<Terme> resultat = new ArrayList<>();
        ArrayList<Integer> res2 = new ArrayList<>();
        for(int i=listeNbOccurrences.length-1; i>=0 && resultat.size() < quantite; i--) {
            for(Terme aAjouter: listeOccurrences.get(listeNbOccurrences[i])) {
                if(!aAjouter.estUnMotVide() || avecMotsVides) {
                    resultat.add(aAjouter);
                    res2.add(listeNbOccurrences[i]);
                }
            }
        }
        
        //System.err.println("Obtenu : "+resultat.size()+" termes les plus fréquents");
        Terme[] res = new Terme[Math.min(quantite, resultat.size())];
        int[] res3 = new int[Math.min(quantite, resultat.size())];
        for(int i=0; i<res.length; i++) {
            res[i] = resultat.get(i);
            res3[i] = res2.get(i);
        }
        //System.err.println(Arrays.toString(res));
        //System.err.println(Arrays.toString(res3));
        return res;
    }
    
    public Terme[] getTermesNormalisesLesPlusFrequents(int quantite, boolean avecMotsVides) {
        // Trier les termes par nombre d'occurrences
        Map<Integer, ArrayList<Terme>> listeOccurrences = new HashMap<>();
        for(Terme aClasser: nbOccurrencesTermesNormalises.keySet()) {
            if(listeOccurrences.get(nbOccurrencesTermesNormalises.get(aClasser)) == null) {
                listeOccurrences.put(nbOccurrencesTermesNormalises.get(aClasser), new ArrayList<Terme>());
            }
            listeOccurrences.get(nbOccurrencesTermesNormalises.get(aClasser)).add(aClasser);
        }
        Integer[] listeNbOccurrences = new Integer[listeOccurrences.keySet().size()];
        listeOccurrences.keySet().toArray(listeNbOccurrences);
        Arrays.sort(listeNbOccurrences);
        //System.err.println("Liste triée : " + Arrays.toString(listeNbOccurrences));
        
        // Sélectionner les <quantite> premiers (sauf, éventuellement, les mots vides
        ArrayList<Terme> resultat = new ArrayList<>();
        ArrayList<Integer> res2 = new ArrayList<>();
        for(int i=listeNbOccurrences.length-1; i>=0 && resultat.size() < quantite; i--) {
            for(Terme aAjouter: listeOccurrences.get(listeNbOccurrences[i])) {
                if(!aAjouter.estUnMotVide() || avecMotsVides) {
                    resultat.add(aAjouter);
                    res2.add(listeNbOccurrences[i]);
                }
            }
        }
        Terme[] res = new Terme[Math.min(quantite, resultat.size())];
        int[] res3 = new int[Math.min(quantite, resultat.size())];
        for(int i=0; i<res.length; i++) {
            res[i] = resultat.get(i);
            res3[i] = res2.get(i);
        }
        //System.err.println(Arrays.toString(res));
        //System.err.println(Arrays.toString(res3));
        return res;
    }
    
    public Lemme[] getLemmesLesPlusFrequents(int quantite, boolean avecLemmesVides) {
        // Trier les termes par nombre d'occurrences
        Map<Integer, ArrayList<Lemme>> listeOccurrences = new HashMap<>();
        for(Lemme aClasser: nbOccurrencesLemmes.keySet()) {
            if(listeOccurrences.get(nbOccurrencesLemmes.get(aClasser)) == null) {
                listeOccurrences.put(nbOccurrencesLemmes.get(aClasser), new ArrayList<Lemme>());
            }
            listeOccurrences.get(nbOccurrencesLemmes.get(aClasser)).add(aClasser);
        }
        Integer[] listeNbOccurrences = new Integer[listeOccurrences.keySet().size()];
        listeOccurrences.keySet().toArray(listeNbOccurrences);
        Arrays.sort(listeNbOccurrences);
        //System.err.println("Liste triée : " + Arrays.toString(listeNbOccurrences));
        
        // Sélectionner les <quantite> premiers (sauf, éventuellement, les mots vides
        ArrayList<Lemme> resultat = new ArrayList<>();
        ArrayList<Integer> res2 = new ArrayList<>();
        for(int i=listeNbOccurrences.length-1; i>=0 && resultat.size() < quantite; i--) {
            for(Lemme aAjouter: listeOccurrences.get(listeNbOccurrences[i])) {
                if(!aAjouter.estUnMotVide() || avecLemmesVides) {
                    resultat.add(aAjouter);
                    res2.add(listeNbOccurrences[i]);
                }
            }
        }
        Lemme[] res = new Lemme[Math.min(quantite, resultat.size())];
        int[] res3 = new int[Math.min(quantite, resultat.size())];
        for(int i=0; i<res.length; i++) {
            res[i] = resultat.get(i);
            res3[i] = res2.get(i);
        }
        //System.err.println(Arrays.toString(res));
        //System.err.println(Arrays.toString(res3));
        return res;
    }
    
    public Lemme[] getLemmesRecurrentsLesPlusFrequents(int quantite) {
        // Trier les termes par nombre d'occurrences
        Map<Integer, ArrayList<Lemme>> listeOccurrences = new HashMap<>();
        for(Lemme aClasser: nbOccurrencesLemmes.keySet()) {
            if(listeOccurrences.get(nbOccurrencesLemmes.get(aClasser)) == null) {
                listeOccurrences.put(nbOccurrencesLemmes.get(aClasser), new ArrayList<Lemme>());
            }
            listeOccurrences.get(nbOccurrencesLemmes.get(aClasser)).add(aClasser);
        }
        Integer[] listeNbOccurrences = new Integer[listeOccurrences.keySet().size()];
        listeOccurrences.keySet().toArray(listeNbOccurrences);
        Arrays.sort(listeNbOccurrences);
        //System.err.println("Liste triée : " + Arrays.toString(listeNbOccurrences));
        
        // Sélectionner les <quantite> premiers (sauf, éventuellement, les mots vides
        ArrayList<Lemme> resultat = new ArrayList<>();
        ArrayList<Integer> res2 = new ArrayList<>();
        for(int i=listeNbOccurrences.length-1; i>=0 && resultat.size() < quantite; i--) {
            for(Lemme aAjouter: listeOccurrences.get(listeNbOccurrences[i])) {
                if(LemmesRecurrents.contientTerme(aAjouter)) {
                    resultat.add(aAjouter);
                    res2.add(listeNbOccurrences[i]);
                }
            }
        }
        Lemme[] res = new Lemme[Math.min(quantite, resultat.size())];
        int[] res3 = new int[Math.min(quantite, resultat.size())];
        for(int i=0; i<res.length; i++) {
            res[i] = resultat.get(i);
            res3[i] = res2.get(i);
        }
        //System.err.println(Arrays.toString(res));
        //System.err.println(Arrays.toString(res3));
        return res;
    }
    
    public Map<String, Integer> getSuitesDeLemmesLesPlusFrequentes(int tailleSuite, int quantite, boolean filtrerMotsVides) {
        // Lister les suites de termes
        ArrayList<Terme> aConsulter0 = new ArrayList<>(contenu);
        ArrayList<Terme> aConsulter = new ArrayList<>();
        if(filtrerMotsVides) {
            for(Terme t: aConsulter0) {
                if(!LemmesVides.contientTerme(t)) {
                    aConsulter.add(t);
                }
            }
        }
        Map<String, Integer> nbOccurrencesSuites = new HashMap<>();
        for(int i=0; i<aConsulter.size()-(tailleSuite-1); i++) {
            String s = aConsulter.get(i).getLemmePrincipal().getExpression();
            for(int j=1; j<tailleSuite; j++) {
                s += " "+aConsulter.get(i+j).getLemmePrincipal().getExpression();
            }
            if(nbOccurrencesSuites.containsKey(s)) {
                nbOccurrencesSuites.put(s, nbOccurrencesSuites.get(s)+1);
            } else {
                nbOccurrencesSuites.put(s, 1);
            }
        }
        // Trier les termes par nombre d'occurrences
        Map<Integer, ArrayList<String>> listeOccurrences = new HashMap<>();
        for(String aClasser: nbOccurrencesSuites.keySet()) {
            if(listeOccurrences.get(nbOccurrencesSuites.get(aClasser)) == null) {
                listeOccurrences.put(nbOccurrencesSuites.get(aClasser), new ArrayList<String>());
            }
            listeOccurrences.get(nbOccurrencesSuites.get(aClasser)).add(aClasser);
        }
        Integer[] listeNbOccurrences = new Integer[listeOccurrences.keySet().size()];
        listeOccurrences.keySet().toArray(listeNbOccurrences);
        Arrays.sort(listeNbOccurrences);
        
        // Sélectionner les <quantite> premiers (sauf, éventuellement, les mots vides
        ArrayList<String> resultat = new ArrayList<>();
        for(int i=listeNbOccurrences.length-1; i>=0 && resultat.size() < quantite; i--) {
            for(String aAjouter: listeOccurrences.get(listeNbOccurrences[i])) {
                resultat.add(aAjouter);
            }
        }
        
        String[] res = new String[Math.min(quantite, resultat.size())];
        for(int i=0; i<res.length; i++) {
            res[i] = resultat.get(i);
        }
        
        Map<String, Integer> res2 = new HashMap<>();
        for(String s: res) {
            res2.put(s, nbOccurrencesSuites.get(s));
        }
        return res2;
    }
    
    public Passage[] getVersets() {
        Passage[] res = new Passage[listeVersets.size()];
        listeVersets.toArray(res);
        return res;
    }
    
    public Passage[] getLignes() {
        Passage[] res = new Passage[listeLignes.size()];
        listeLignes.toArray(res);
        return res;
    }
    
    public Passage[] getPhrases(boolean avecMotsVides) {
        if(source == null) {
            return new Passage[]{};
        }
        return avecMotsVides?source.getPhrases():source.getPhrasesSansMotsVides();
    }
    
    public Passage[] getBlocsMots(int nbMotsParBloc, boolean avecMotsVides) {
        if(source == null) {
            return new Passage[]{};
        }
        ArrayList<Passage> blocs = new ArrayList<>();
        Passage[] mots = (avecMotsVides ? source.getAllMots() : source.getAllMotsNonVides());
        for(int i=0; i<mots.length; i+=nbMotsParBloc) {
            ArrayList<Passage> motsDuBloc = new ArrayList<>();
            for(int j=i; j<i+nbMotsParBloc && j<mots.length; j++) {
                motsDuBloc.add(mots[j]);
            }
            Passage[] motsDuBloc2 = new Passage[motsDuBloc.size()];
            motsDuBloc.toArray(motsDuBloc2);
            Passage bloc = Passage.creeCollectionDePassages(motsDuBloc2);
            blocs.add(bloc);
        }
        Passage[] blocs2 = new Passage[blocs.size()];
        blocs.toArray(blocs2);
        return blocs2;
    }
    
    public String getTexte() {
        ArrayList<Character> res = new ArrayList<>();
        for(Terme t: contenu) {
            char[] liste = t.getExpression().toCharArray();
            for(char c: liste) {
                res.add(new Character(c));
            }
            res.add(' ');
        }
        char[] res0 = new char[res.size()];
        for(int i=0; i<res.size(); i++) {
            res0[i] = res.get(i).charValue();
        }
        String s = new String(res0);
        return s;
    }
    
    @Override
    public String toString() {
        return getTexte();
    }
}
