Changeset 91362 in spip-zone


Ignore:
Timestamp:
Aug 15, 2015, 7:49:02 AM (4 years ago)
Author:
cedric@…
Message:

eviter un plantage si pas de fonctions mb_truc et PHPDoc

Location:
_plugins_/nospam
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • _plugins_/nospam/inc/detecter_langue.php

    r86784 r91362  
    6767$GLOBALS["ngrams"]['zu'] = array("oku","la ","nga"," ng","a n"," ku","a k","thi"," uk","ezi","e n","uku","le ","lo ","hi ","wa "," no","a u","ela","we ","a i","ni ","ele","zin","uth","ama","elo","pha","ing","aba","ath","and","enz","eth","esi","ma ","lel"," um"," ka","the","ung","nge","ngo","tho","nye","kwe","eni","izi","ye "," kw","ndl","ho ","a e","na ","zi ","het","kan","e u","e i","und","ise","isi","nda","kha","ba ","i k","nom","fun"," ez"," iz","ke ","ben","o e","isa","zwe","kel","ka ","aka","nzi","o n","e k","oma","kwa"," ne","any","ang","hla","i u","mth","kub","o k","ana","ane","ikh","ebe","kut","ha "," is","azi","ulu","seb","ala","onk","ban","i e","azw","wen"," ab","han","a a","i n","imi","lan","hat","lwa"," na","ini","akh","li ","ngu","nke","nok","ume","eke","elw","yo ","aph","kus"," es"," ok","iph"," im","mel","i i"," lo"," in"," am","kho","za ","gok","sek","lun","kun","lwe","sha","sik","kuf","hak","a y","thu","sa ","o u","khu","ayo","hul","e a","ali","eng","lu ","ne "," ko","eli","uba","dle","e e","ith"," yo","a l","nel","mis"," si","kul","a o","sis","lok","gen","o z","i a","emi","uma","eka","alo","man","isw","tha","o i","lon","so ","uph","uhl","ntu","zim","mal","ind","wez"," ba","o o"," yi"," we","ula","phe","o y","ile","o l","wo ","wel","ga ","tu ","hle","okw","fan"," le","kaz","ase","ani","nde","bo ","ngi","ule"," em","men","iny","amb","mbi","gan","ifu","o s","ant","hel","ika","ona","i l","fut"," fu","ze ","u a","nhl","nin"," zo","end","sig","u k","gab","ufa","ish","ush","kuz","no ","gam","kuh"," ye","nya","nez","zis","dlu","kat","dla","tsh"," se","ike","kuq","gu ","osi","swa","lul"," zi","ima","e l","kup","mo ","nza","asi","ko ","kum","lek","she","umt","uny","yok","wan","wam","ame","ong","lis","mkh","ahl","ale","use","o a","alu","gap","si ","hlo","nje","omt","o w","okh","he ","kom","i s");
    6868
     69/**
     70 * Lister les ngrams contenus dans un texte
     71 * @param string $string
     72 * @param int $ng_number
     73 * @param int $ng_max_chars
     74 * @return array
     75 */
    6976function createNGrams($string, $ng_number=300, $ng_max_chars=4) {
    7077
     
    98105}
    99106
     107
     108/**
     109 * Trouver la langue la/les langues plausibles en fonction des ngrams trouves dans le texte et des ngrams possibles
     110 * @param array $sub_ng
     111 *   jeu de Ngrams trouves dans le texte
     112 * @param array $lm_ng
     113 *   jeu de Ngrams possibles
     114 * @param int $max_delta
     115 * @return string
     116 */
    100117function compareNGrams($sub_ng, $lm_ng, $max_delta = 140000) {
    101118        foreach ($lm_ng as $lm_basename => $language) {
     
    128145}
    129146
     147/**
     148 * Retourne la proportion de texte qui n'est pas dans la plage utf cherchee
     149 * @param string $texte
     150 * @param string $plage
     151 * @return float
     152 */
    130153function tester_plage_utf($texte, $plage) {
    131154
     
    140163}
    141164
     165/**
     166 * Reduire le jeu de langue possibles en fonction des plages utf8 utilisees dans le texte
     167 * @param string $texte
     168 * @return array|bool|string
     169 */
    142170function detecter_plages_utf($texte) {
    143171
     
    178206
    179207
    180 // Detecter langue
    181 
    182 // Premiere passe: detecter dans quel alphabet le texte est écrit
    183 // ce qui permet de limiter le nombre de réponse
    184 // (par exemple: des caractères «arabes» ne peuvent être que de l'arabe, du farsi, du pachtourne ou du urdu)
    185 // Deuxieme passe: faire un test classique sur les trigrams (uniquement sur les langues possibles de la première passe)
    186 
     208/**
     209 * Detecter langue
     210 *
     211 * Premiere passe: detecter dans quel alphabet le texte est écrit
     212 * ce qui permet de limiter le nombre de réponse
     213 * (par exemple: des caractères «arabes» ne peuvent être que de l'arabe, du farsi, du pachtourne ou du urdu)
     214 * Deuxieme passe: faire un test classique sur les trigrams (uniquement sur les langues possibles de la première passe)
     215 *
     216 * @param string $texte
     217 * @return bool|string
     218 */
    187219function _detecter_langue($texte) {
    188220        $texte = strip_tags($texte);
    189221        $texte = str_replace("’", "'", $texte);
    190222        $texte = str_replace("\"", " ", $texte);
    191        
    192         // Si texte trop court, impossible de détecter la langue
    193         if (mb_strlen($texte, "utf-8") < 6 ) return false;
     223
     224        // Si les fonctions mb_ ne sont pas disponibles
     225        // ou si texte trop court, impossible de détecter la langue
     226        if (!function_exists('mb_strlen')
     227          OR !function_exists('mb_substr')
     228          OR mb_strlen($texte, "utf-8") < 6){
     229                return false;
     230        }
    194231
    195232        $possibles = detecter_plages_utf($texte);
    196        
    197         if (!$possibles)
    198                 return;
     233
     234        if (!$possibles) {
     235                return false;
     236        }
    199237        else if (!is_array($possibles)) {
    200238                return $possibles;
     
    202240                $ngrams = array();
    203241                foreach($possibles as $lang) {
    204                         $ngrams["$lang"] = $GLOBALS["ngrams"]["$lang"];
     242                        $ngrams[$lang] = $GLOBALS["ngrams"][$lang];
    205243                }
    206244        }
    207        
     245
    208246        $sub_ng = createNGrams($texte);
    209 
    210247        $result_array = compareNGrams($sub_ng, $ngrams, 140000);
    211        
     248
    212249//      print_r($result_array);
    213250       
    214         $resultat = false;
    215         if ($result_array) {
    216                 foreach($result_array as $lang => $val) {
    217                         if (!$resultat) $resultat = $lang;
     251        $lang = false;
     252        if (is_array($result_array) AND count($result_array)) {
     253                while(!$lang AND $r = each($result_array)){
     254                        list($lang,$val) = $r;
    218255                }
    219256        }
    220         return $resultat;
     257        return $lang;
    221258}
    222259
  • _plugins_/nospam/paquet.xml

    r90670 r91362  
    22        prefix="nospam"
    33        categorie="performance"
    4         version="1.5.12"
     4        version="1.5.13"
    55        etat="stable"
    66        compatibilite="[2.0.0;3.1.*]"
  • _plugins_/nospam/plugin.xml

    r90670 r91362  
    1010        <icon>images/nospam-32.png</icon>
    1111        <licence>(c) 2008 GPL</licence>
    12         <version>1.5.12</version>
     12        <version>1.5.13</version>
    1313        <etat>stable</etat>
    1414        <description><multi>
Note: See TracChangeset for help on using the changeset viewer.