/*
Copyright 2012-2014 Samuel Gesche

This file is part of the Greek Reuse Toolkit.

The Greek Reuse Toolkit is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

The Greek Reuse Toolkit is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with the Greek Reuse Toolkit.  If not, see <http://www.gnu.org/licenses/>.
*/

package fr.cnrs.liris.drim.grt.modele.convertisseurs;

import org.apache.commons.lang.StringUtils;

/**
 *
 * @author sgesche
 */
public class ConvertisseurTexteUnicodeVersUnicode {
    public static String conversion(String s, boolean sansLatin) {
        String s0 = s;
        if(sansLatin) {
            s0 = nettoieLatin(s0);
        }
        
        // Pour tous ces caractères aux codes étranges que l'on retrouve au hasard des textes
        
        // variantes d'écriture
        s0 = StringUtils.replace(s0, "ϐ", ""+(char)946);
        s0 = StringUtils.replace(s0, "ϕ", ""+(char)966);
        
        // variantes de code
        s0 = StringUtils.replace(s0, "Ἠ", ""+(char)7986);
        s0 = StringUtils.replace(s0, "Ἡ", ""+(char)7987);
        /*s0 = StringUtils.replace(s0, ""+(char)902, ""+(char)); //Ά
        s0 = StringUtils.replace(s0, ""+(char)904, ""+(char)); //Έ
        s0 = StringUtils.replace(s0, ""+(char)905, ""+(char)); //Ή
        s0 = StringUtils.replace(s0, ""+(char)906, ""+(char)); //Ί
        s0 = StringUtils.replace(s0, ""+(char)908, ""+(char)); //Ό
        s0 = StringUtils.replace(s0, ""+(char)910, ""+(char)); //Ύ
        s0 = StringUtils.replace(s0, ""+(char)911, ""+(char));*/ //Ώ
        s0 = StringUtils.replace(s0, ""+(char)912, ""+(char)8147); //ΐ
        /*s0 = StringUtils.replace(s0, ""+(char)938, ""+(char)); //Ϊ
        s0 = StringUtils.replace(s0, ""+(char)939, ""+(char));*/ //Ϋ
        s0 = StringUtils.replace(s0, ""+(char)940, ""+(char)8049); //ά
        s0 = StringUtils.replace(s0, ""+(char)941, ""+(char)8051); //έ
        s0 = StringUtils.replace(s0, ""+(char)942, ""+(char)8053); //ή
        s0 = StringUtils.replace(s0, ""+(char)943, ""+(char)8055); //ί
        s0 = StringUtils.replace(s0, ""+(char)944, ""+(char)8163); //ΰ
        /*s0 = StringUtils.replace(s0, ""+(char)970, ""+(char)); //ϊ
        s0 = StringUtils.replace(s0, ""+(char)971, ""+(char));*/ //ϋ
        s0 = StringUtils.replace(s0, ""+(char)972, ""+(char)8057); //ό
        s0 = StringUtils.replace(s0, ""+(char)973, ""+(char)8059); //ύ
        s0 = StringUtils.replace(s0, ""+(char)974, ""+(char)8061); //ώ
        
        // Caractères sont nous n'avons rien à faire (peut ne pas être le cas ailleurs)
        s0 = StringUtils.replace(s0, ""+(char)8112, ""+(char)945); //ᾰ
        s0 = StringUtils.replace(s0, ""+(char)8113, ""+(char)945); //ᾱ
        s0 = StringUtils.replace(s0, ""+(char)8120, ""+(char)913); //Ᾰ
        s0 = StringUtils.replace(s0, ""+(char)8121, ""+(char)913); //Ᾱ
        s0 = StringUtils.replace(s0, ""+(char)8144, ""+(char)953); //ῐ
        s0 = StringUtils.replace(s0, ""+(char)8145, ""+(char)953); //ῑ
        s0 = StringUtils.replace(s0, ""+(char)8152, ""+(char)921); //Ῐ
        s0 = StringUtils.replace(s0, ""+(char)8153, ""+(char)921); //Ῑ
        s0 = StringUtils.replace(s0, ""+(char)8160, ""+(char)965); //ῠ
        s0 = StringUtils.replace(s0, ""+(char)8161, ""+(char)965); //ῡ
        s0 = StringUtils.replace(s0, ""+(char)8168, ""+(char)933); //Ῠ
        s0 = StringUtils.replace(s0, ""+(char)8169, ""+(char)933); //Ῡ
        
        // éclatement accents/esprits et caractère
        s0 = StringUtils.replace(s0, "’α", ""+(char)7936);
        s0 = StringUtils.replace(s0, "᾽α", ""+(char)7936);
        s0 = StringUtils.replace(s0, "᾿α", ""+(char)7936);
        s0 = StringUtils.replace(s0, "῍α", ""+(char)7938);
        s0 = StringUtils.replace(s0, "῎α", ""+(char)7940);
        s0 = StringUtils.replace(s0, "῏α", ""+(char)7942);
        s0 = StringUtils.replace(s0, "‘α", ""+(char)7937);
        s0 = StringUtils.replace(s0, "῾α", ""+(char)7937);
        s0 = StringUtils.replace(s0, "῝α", ""+(char)7939);
        s0 = StringUtils.replace(s0, "῞α", ""+(char)7941);
        s0 = StringUtils.replace(s0, "῟α", ""+(char)7943);
        s0 = StringUtils.replace(s0, "`α", ""+(char)8048);
        s0 = StringUtils.replace(s0, "´α", ""+(char)8049);
        s0 = StringUtils.replace(s0, "῀α", ""+(char)8118);
        s0 = StringUtils.replace(s0, "’ᾳ", ""+(char)8064);
        s0 = StringUtils.replace(s0, "᾽ᾳ", ""+(char)8064);
        s0 = StringUtils.replace(s0, "᾿ᾳ", ""+(char)8064);
        s0 = StringUtils.replace(s0, "῍ᾳ", ""+(char)8066);
        s0 = StringUtils.replace(s0, "῎ᾳ", ""+(char)8068);
        s0 = StringUtils.replace(s0, "῏ᾳ", ""+(char)8070);
        s0 = StringUtils.replace(s0, "‘ᾳ", ""+(char)8065);
        s0 = StringUtils.replace(s0, "῾ᾳ", ""+(char)8065);
        s0 = StringUtils.replace(s0, "῝ᾳ", ""+(char)8067);
        s0 = StringUtils.replace(s0, "῞ᾳ", ""+(char)8069);
        s0 = StringUtils.replace(s0, "῟ᾳ", ""+(char)8071);
        s0 = StringUtils.replace(s0, "`ᾳ", ""+(char)8114);
        s0 = StringUtils.replace(s0, "´ᾳ", ""+(char)8116);
        s0 = StringUtils.replace(s0, "῀ᾳ", ""+(char)8119);
        
        s0 = StringUtils.replace(s0, "’ε", ""+(char)7952);
        s0 = StringUtils.replace(s0, "᾽ε", ""+(char)7952);
        s0 = StringUtils.replace(s0, "᾿ε", ""+(char)7952);
        s0 = StringUtils.replace(s0, "῍ε", ""+(char)7954);
        s0 = StringUtils.replace(s0, "῎ε", ""+(char)7956);
        s0 = StringUtils.replace(s0, "‘ε", ""+(char)7953);
        s0 = StringUtils.replace(s0, "῾ε", ""+(char)7953);
        s0 = StringUtils.replace(s0, "῝ε", ""+(char)7955);
        s0 = StringUtils.replace(s0, "`ε", ""+(char)8050);
        s0 = StringUtils.replace(s0, "´ε", ""+(char)8051);
        s0 = StringUtils.replace(s0, "῞ε", ""+(char)7957);
        
        s0 = StringUtils.replace(s0, "’η", ""+(char)7968);
        s0 = StringUtils.replace(s0, "᾽η", ""+(char)7968);
        s0 = StringUtils.replace(s0, "᾿η", ""+(char)7968);
        s0 = StringUtils.replace(s0, "῍η", ""+(char)7970);
        s0 = StringUtils.replace(s0, "῎η", ""+(char)7972);
        s0 = StringUtils.replace(s0, "῏η", ""+(char)7974);
        s0 = StringUtils.replace(s0, "‘η", ""+(char)7969);
        s0 = StringUtils.replace(s0, "῾η", ""+(char)7969);
        s0 = StringUtils.replace(s0, "῝η", ""+(char)7971);
        s0 = StringUtils.replace(s0, "῞η", ""+(char)7973);
        s0 = StringUtils.replace(s0, "῟η", ""+(char)7975);
        s0 = StringUtils.replace(s0, "`η", ""+(char)8052);
        s0 = StringUtils.replace(s0, "´η", ""+(char)8053);
        s0 = StringUtils.replace(s0, "῀η", ""+(char)8134);
        s0 = StringUtils.replace(s0, "’ῃ", ""+(char)8080);
        s0 = StringUtils.replace(s0, "᾽ῃ", ""+(char)8080);
        s0 = StringUtils.replace(s0, "᾿ῃ", ""+(char)8080);
        s0 = StringUtils.replace(s0, "῍ῃ", ""+(char)8082);
        s0 = StringUtils.replace(s0, "῎ῃ", ""+(char)8084);
        s0 = StringUtils.replace(s0, "῏ῃ", ""+(char)8086);
        s0 = StringUtils.replace(s0, "‘ῃ", ""+(char)8081);
        s0 = StringUtils.replace(s0, "῾ῃ", ""+(char)8081);
        s0 = StringUtils.replace(s0, "῝ῃ", ""+(char)8083);
        s0 = StringUtils.replace(s0, "῞ῃ", ""+(char)8085);
        s0 = StringUtils.replace(s0, "῟ῃ", ""+(char)8087);
        s0 = StringUtils.replace(s0, "`ῃ", ""+(char)8130);
        s0 = StringUtils.replace(s0, "´ῃ", ""+(char)8132);
        s0 = StringUtils.replace(s0, "῀ῃ", ""+(char)8135);
        
        s0 = StringUtils.replace(s0, "’ι", ""+(char)7984);
        s0 = StringUtils.replace(s0, "᾽ι", ""+(char)7984);
        s0 = StringUtils.replace(s0, "᾿ι", ""+(char)7984);
        s0 = StringUtils.replace(s0, "῍ι", ""+(char)7986);
        s0 = StringUtils.replace(s0, "῎ι", ""+(char)7988);
        s0 = StringUtils.replace(s0, "῏ι", ""+(char)7990);
        s0 = StringUtils.replace(s0, "‘ι", ""+(char)7985);
        s0 = StringUtils.replace(s0, "῾ι", ""+(char)7985);
        s0 = StringUtils.replace(s0, "῝ι", ""+(char)7987);
        s0 = StringUtils.replace(s0, "῞ι", ""+(char)7989);
        s0 = StringUtils.replace(s0, "῟ι", ""+(char)7991);
        s0 = StringUtils.replace(s0, "`ι", ""+(char)8054);
        s0 = StringUtils.replace(s0, "´ι", ""+(char)8055);
        s0 = StringUtils.replace(s0, "῀ι", ""+(char)8150);
        s0 = StringUtils.replace(s0, "῭ι", ""+(char)8146);
        s0 = StringUtils.replace(s0, "΅ι", ""+(char)8147);
        
        s0 = StringUtils.replace(s0, "’ο", ""+(char)8000);
        s0 = StringUtils.replace(s0, "᾽ο", ""+(char)8000);
        s0 = StringUtils.replace(s0, "᾿ο", ""+(char)8000);
        s0 = StringUtils.replace(s0, "῍ο", ""+(char)8002);
        s0 = StringUtils.replace(s0, "῎ο", ""+(char)8004);
        s0 = StringUtils.replace(s0, "‘ο", ""+(char)8001);
        s0 = StringUtils.replace(s0, "῾ο", ""+(char)8001);
        s0 = StringUtils.replace(s0, "῝ο", ""+(char)8003);
        s0 = StringUtils.replace(s0, "῞ο", ""+(char)8005);
        s0 = StringUtils.replace(s0, "`ο", ""+(char)8056);
        s0 = StringUtils.replace(s0, "´ο", ""+(char)8057);
        
        s0 = StringUtils.replace(s0, "’υ", ""+(char)8016);
        s0 = StringUtils.replace(s0, "᾽υ", ""+(char)8016);
        s0 = StringUtils.replace(s0, "᾿υ", ""+(char)8016);
        s0 = StringUtils.replace(s0, "‘υ", ""+(char)8017);
        s0 = StringUtils.replace(s0, "῾υ", ""+(char)8017);
        s0 = StringUtils.replace(s0, "῍υ", ""+(char)8018);
        s0 = StringUtils.replace(s0, "῝υ", ""+(char)8019);
        s0 = StringUtils.replace(s0, "῎υ", ""+(char)8020);
        s0 = StringUtils.replace(s0, "῞υ", ""+(char)8021);
        s0 = StringUtils.replace(s0, "῏υ", ""+(char)8022);
        s0 = StringUtils.replace(s0, "῟υ", ""+(char)8023);
        s0 = StringUtils.replace(s0, "`υ", ""+(char)8058);
        s0 = StringUtils.replace(s0, "´υ", ""+(char)8059);
        s0 = StringUtils.replace(s0, "῀υ", ""+(char)8166);
        s0 = StringUtils.replace(s0, "῭υ", ""+(char)8162);
        s0 = StringUtils.replace(s0, "΅υ", ""+(char)8163);
        
        s0 = StringUtils.replace(s0, "’ω", ""+(char)8032);
        s0 = StringUtils.replace(s0, "᾽ω", ""+(char)8032);
        s0 = StringUtils.replace(s0, "᾿ω", ""+(char)8032);
        s0 = StringUtils.replace(s0, "‘ω", ""+(char)8033);
        s0 = StringUtils.replace(s0, "῾ω", ""+(char)8033);
        s0 = StringUtils.replace(s0, "῍ω", ""+(char)8034);
        s0 = StringUtils.replace(s0, "῝ω", ""+(char)8035);
        s0 = StringUtils.replace(s0, "῎ω", ""+(char)8036);
        s0 = StringUtils.replace(s0, "῞ω", ""+(char)8037);
        s0 = StringUtils.replace(s0, "῏ω", ""+(char)8038);
        s0 = StringUtils.replace(s0, "῟ω", ""+(char)8039);
        s0 = StringUtils.replace(s0, "`ω", ""+(char)8060);
        s0 = StringUtils.replace(s0, "´ω", ""+(char)8061);
        s0 = StringUtils.replace(s0, "῀ω", ""+(char)8182);
        s0 = StringUtils.replace(s0, "’ῳ", ""+(char)8096);
        s0 = StringUtils.replace(s0, "᾽ῳ", ""+(char)8096);
        s0 = StringUtils.replace(s0, "᾿ῳ", ""+(char)8096);
        s0 = StringUtils.replace(s0, "‘ῳ", ""+(char)8097);
        s0 = StringUtils.replace(s0, "῾ῳ", ""+(char)8097);
        s0 = StringUtils.replace(s0, "῍ῳ", ""+(char)8098);
        s0 = StringUtils.replace(s0, "῝ῳ", ""+(char)8099);
        s0 = StringUtils.replace(s0, "῎ῳ", ""+(char)8100);
        s0 = StringUtils.replace(s0, "῞ῳ", ""+(char)8101);
        s0 = StringUtils.replace(s0, "῏ῳ", ""+(char)8102);
        s0 = StringUtils.replace(s0, "῟ῳ", ""+(char)8103);
        s0 = StringUtils.replace(s0, "`ῳ", ""+(char)8178);
        s0 = StringUtils.replace(s0, "´ῳ", ""+(char)8180);
        s0 = StringUtils.replace(s0, "῀ῳ", ""+(char)8183);
        
        s0 = StringUtils.replace(s0, "‘ρ", ""+(char)8165);
        s0 = StringUtils.replace(s0, "῾ρ", ""+(char)8165);
        
        s0 = StringUtils.replace(s0, "’Α", ""+(char)7944);
        s0 = StringUtils.replace(s0, "᾽Α", ""+(char)7944);
        s0 = StringUtils.replace(s0, "᾿Α", ""+(char)7944);
        s0 = StringUtils.replace(s0, "‘Α", ""+(char)7945);
        s0 = StringUtils.replace(s0, "῾Α", ""+(char)7945);
        s0 = StringUtils.replace(s0, "῍Α", ""+(char)7946);
        s0 = StringUtils.replace(s0, "῝Α", ""+(char)7947);
        s0 = StringUtils.replace(s0, "῎Α", ""+(char)7948);
        s0 = StringUtils.replace(s0, "῞Α", ""+(char)7949);
        s0 = StringUtils.replace(s0, "῏Α", ""+(char)7950);
        s0 = StringUtils.replace(s0, "῟Α", ""+(char)7951);
        
        s0 = StringUtils.replace(s0, "’Ε", ""+(char)7960);
        s0 = StringUtils.replace(s0, "᾽Ε", ""+(char)7960);
        s0 = StringUtils.replace(s0, "᾿Ε", ""+(char)7960);
        s0 = StringUtils.replace(s0, "‘Ε", ""+(char)7961);
        s0 = StringUtils.replace(s0, "῾Ε", ""+(char)7961);
        s0 = StringUtils.replace(s0, "῍Ε", ""+(char)7962);
        s0 = StringUtils.replace(s0, "῝Ε", ""+(char)7963);
        s0 = StringUtils.replace(s0, "῎Ε", ""+(char)7964);
        s0 = StringUtils.replace(s0, "῞Ε", ""+(char)7965);
        
        s0 = StringUtils.replace(s0, "’Η", ""+(char)7976);
        s0 = StringUtils.replace(s0, "᾽Η", ""+(char)7976);
        s0 = StringUtils.replace(s0, "᾿Η", ""+(char)7976);
        s0 = StringUtils.replace(s0, "‘Η", ""+(char)7977);
        s0 = StringUtils.replace(s0, "῾Η", ""+(char)7977);
        s0 = StringUtils.replace(s0, "῍Η", ""+(char)7978);
        s0 = StringUtils.replace(s0, "῝Η", ""+(char)7979);
        s0 = StringUtils.replace(s0, "῎Η", ""+(char)7980);
        s0 = StringUtils.replace(s0, "῞Η", ""+(char)7981);
        s0 = StringUtils.replace(s0, "῏Η", ""+(char)7982);
        s0 = StringUtils.replace(s0, "῟Η", ""+(char)7983);
        
        s0 = StringUtils.replace(s0, "’Ι", ""+(char)7992);
        s0 = StringUtils.replace(s0, "᾽Ι", ""+(char)7992);
        s0 = StringUtils.replace(s0, "᾿Ι", ""+(char)7992);
        s0 = StringUtils.replace(s0, "‘Ι", ""+(char)7993);
        s0 = StringUtils.replace(s0, "῾Ι", ""+(char)7993);
        s0 = StringUtils.replace(s0, "῍Ι", ""+(char)7994);
        s0 = StringUtils.replace(s0, "῝Ι", ""+(char)7995);
        s0 = StringUtils.replace(s0, "῎Ι", ""+(char)7996);
        s0 = StringUtils.replace(s0, "῞Ι", ""+(char)7997);
        s0 = StringUtils.replace(s0, "῏Ι", ""+(char)7998);
        s0 = StringUtils.replace(s0, "῟Ι", ""+(char)7999);
        
        s0 = StringUtils.replace(s0, "’Ο", ""+(char)8008);
        s0 = StringUtils.replace(s0, "᾽Ο", ""+(char)8008);
        s0 = StringUtils.replace(s0, "᾿Ο", ""+(char)8008);
        s0 = StringUtils.replace(s0, "‘Ο", ""+(char)8009);
        s0 = StringUtils.replace(s0, "῾Ο", ""+(char)8009);
        s0 = StringUtils.replace(s0, "῍Ο", ""+(char)8010);
        s0 = StringUtils.replace(s0, "῝Ο", ""+(char)8011);
        s0 = StringUtils.replace(s0, "῎Ο", ""+(char)8012);
        s0 = StringUtils.replace(s0, "῞Ο", ""+(char)8013);
        
        s0 = StringUtils.replace(s0, "‘Υ", ""+(char)8025);
        s0 = StringUtils.replace(s0, "῾Υ", ""+(char)8027);
        s0 = StringUtils.replace(s0, "῝Υ", ""+(char)8029);
        s0 = StringUtils.replace(s0, "῞Υ", ""+(char)8031);
        
        s0 = StringUtils.replace(s0, "’Ω", ""+(char)8040);
        s0 = StringUtils.replace(s0, "᾽Ω", ""+(char)8040);
        s0 = StringUtils.replace(s0, "᾿Ω", ""+(char)8040);
        s0 = StringUtils.replace(s0, "‘Ω", ""+(char)8041);
        s0 = StringUtils.replace(s0, "῾Ω", ""+(char)8041);
        s0 = StringUtils.replace(s0, "῍Ω", ""+(char)8042);
        s0 = StringUtils.replace(s0, "῝Ω", ""+(char)8043);
        s0 = StringUtils.replace(s0, "῎Ω", ""+(char)8044);
        s0 = StringUtils.replace(s0, "῞Ω", ""+(char)8045);
        s0 = StringUtils.replace(s0, "῏Ω", ""+(char)8046);
        s0 = StringUtils.replace(s0, "῟Ω", ""+(char)8047);
        
        s0 = StringUtils.replace(s0, "‘Ρ", ""+(char)8172);
        s0 = StringUtils.replace(s0, "῾Ρ", ""+(char)8172);
        
        return s0;
    }
    
    private static String nettoieLatin(String expression) {
        String s0 = expression.replaceAll("[A-Za-z0-9]", "");
        return s0;
    }
}
