Changeset 7599 in spip-zone


Ignore:
Timestamp:
Nov 27, 2006, 3:16:40 PM (12 years ago)
Author:
real3t@…
Message:

Mise à jour en 1.3.0 de la library.

Location:
_plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library
Files:
12 added
8 edited

Legend:

Unmodified
Added
Removed
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier.php

    r7584 r7599  
    2323
    2424/*
    25     HTML Purifier 1.2.0 - Standards Compliant HTML Filtering
     25    HTML Purifier 1.3.0 - Standards Compliant HTML Filtering
    2626    Copyright (C) 2006 Edward Z. Yang
    2727
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php

    r7584 r7599  
    2525);
    2626
    27 HTMLPurifier_ConfigSchema::Define(
     27HTMLPurifier_ConfigSchema::define(
    2828    'URI', 'DisableExternal', false, 'bool',
    2929    'Disables links to external websites.  This is a highly effective '.
     
    3535);
    3636
     37HTMLPurifier_ConfigSchema::define(
     38    'URI', 'DisableExternalResources', false, 'bool',
     39    'Disables the embedding of external resources, preventing users from '.
     40    'embedding things like images from other hosts. This prevents '.
     41    'access tracking (good for email viewers), bandwidth leeching, '.
     42    'cross-site request forging, goatse.cx posting, and '.
     43    'other nasties, but also results in '.
     44    'a loss of end-user functionality (they can\'t directly post a pic '.
     45    'they posted from Flickr anymore). Use it if you don\'t have a '.
     46    'robust user-content moderation team. This directive has been '.
     47    'available since 1.3.0.'
     48);
     49
     50HTMLPurifier_ConfigSchema::define(
     51    'URI', 'DisableResources', false, 'bool',
     52    'Disables embedding resources, essentially meaning no pictures. You can '.
     53    'still link to them though. See %URI.DisableExternalResources for why '.
     54    'this might be a good idea. This directive has been available since 1.3.0.'
     55);
     56
     57HTMLPurifier_ConfigSchema::define(
     58    'URI', 'Munge', null, 'string/null',
     59    'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
     60    'redirection service. Pass this directive a URI, with %s inserted where '.
     61    'the url-encoded original URI should be inserted (sample: '.
     62    '<code>http://www.google.com/url?q=%s</code>). '.
     63    'This prevents PageRank leaks, while being as transparent as possible '.
     64    'to users (you may also want to add some client side JavaScript to '.
     65    'override the text in the statusbar). Warning: many security experts '.
     66    'believe that this form of protection does not deter spam-bots. '.
     67    'You can also use this directive to redirect users to a splash page '.
     68    'telling them they are leaving your website. '.
     69    'This directive has been available since 1.3.0.'
     70);
     71
     72HTMLPurifier_ConfigSchema::define(
     73    'URI', 'HostBlacklist', array(), 'list',
     74    'List of strings that are forbidden in the host of any URI. Use it to '.
     75    'kill domain names of spam, etc. Note that it will catch anything in '.
     76    'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
     77    'This directive has been available since 1.3.0.'
     78);
     79
    3780/**
    3881 * Validates a URI as defined by RFC 3986.
     
    4487    var $host;
    4588    var $PercentEncoder;
    46     var $embeds;
     89    var $embeds_resource;
    4790   
    4891    /**
    49      * @param $embeds Does the URI here result in an extra HTTP request?
     92     * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
    5093     */
    51     function HTMLPurifier_AttrDef_URI($embeds = false) {
     94    function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
    5295        $this->host = new HTMLPurifier_AttrDef_Host();
    5396        $this->PercentEncoder = new HTMLPurifier_PercentEncoder();
    54         $this->embeds = (bool) $embeds;
     97        $this->embeds_resource = (bool) $embeds_resource;
    5598    }
    5699   
     
    106149       
    107150       
    108         // the URI we're processing embeds a resource in the page, but the URI
     151        // the URI we're processing embeds_resource a resource in the page, but the URI
    109152        // it references cannot be located
    110         if ($this->embeds && !$scheme_obj->browsable) {
     153        if ($this->embeds_resource && !$scheme_obj->browsable) {
    111154            return false;
    112155        }
     
    115158        if ($authority !== null) {
    116159           
    117             // remove URI if it's absolute and we disallow externals
     160            // remove URI if it's absolute and we disabled externals or
     161            // if it's absolute and embedded and we disabled external resources
    118162            unset($our_host);
    119             if ($config->get('URI', 'DisableExternal')) {
     163            if (
     164                $config->get('URI', 'DisableExternal') ||
     165                (
     166                    $config->get('URI', 'DisableExternalResources') &&
     167                    $this->embeds_resource
     168                )
     169            ) {
    120170                $our_host = $config->get('URI', 'Host');
    121171                if ($our_host === null) return false;
     
    144194            if ($host === false) $host = null;
    145195           
     196            if ($this->checkBlacklist($host, $config, $context)) return false;
     197           
    146198            // more lenient absolute checking
    147199            if (isset($our_host)) {
     
    199251        if ($fragment !== null) $result .= "#$fragment";
    200252       
     253        // munge if necessary
     254        $munge = $config->get('URI', 'Munge');
     255        if (!empty($scheme_obj->browsable) && $munge !== null) {
     256            if ($authority !== null) {
     257                $result = str_replace('%s', rawurlencode($result), $munge);
     258            }
     259        }
     260       
    201261        return $result;
    202262       
    203263    }
    204264   
     265    /**
     266     * Checks a host against an array blacklist
     267     * @param $host Host to check
     268     * @param $config HTMLPurifier_Config instance
     269     * @param $context HTMLPurifier_Context instance
     270     * @return bool Is spam?
     271     */
     272    function checkBlacklist($host, &$config, &$context) {
     273        $blacklist = $config->get('URI', 'HostBlacklist');
     274        if (!empty($blacklist)) {
     275            foreach($blacklist as $blacklisted_host_fragment) {
     276                if (strpos($host, $blacklisted_host_fragment) !== false) {
     277                    return true;
     278                }
     279            }
     280        }
     281        return false;
     282    }
     283   
    205284}
    206285
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/ChildDef.php

    r6358 r7599  
    2121{
    2222    /**
    23      * Type of child definition, usually right-most part of class name lowercase
    24      *
    25      * Used occasionally in terms of context.  Possible values include
    26      * custom, required, optional and empty.
     23     * Type of child definition, usually right-most part of class name lowercase.
     24     * Used occasionally in terms of context.
     25     * @public
    2726     */
    2827    var $type;
     
    3332     * This is necessary for redundant checking when changes affecting
    3433     * a child node may cause a parent node to now be disallowed.
     34     *
     35     * @public
    3536     */
    3637    var $allow_empty;
     
    3940     * Validates nodes according to definition and returns modification.
    4041     *
     42     * @public
    4143     * @param $tokens_of_children Array of HTMLPurifier_Token
    4244     * @param $config HTMLPurifier_Config object
     
    5153}
    5254
    53 /**
    54  * Custom validation class, accepts DTD child definitions
    55  *
    56  * @warning Currently this class is an all or nothing proposition, that is,
    57  *          it will only give a bool return value.
    58  * @note This class is currently not used by any code, although it is unit
    59  *       tested.
    60  */
    61 class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
    62 {
    63     var $type = 'custom';
    64     var $allow_empty = false;
    65     /**
    66      * Allowed child pattern as defined by the DTD
    67      */
    68     var $dtd_regex;
    69     /**
    70      * PCRE regex derived from $dtd_regex
    71      * @private
    72      */
    73     var $_pcre_regex;
    74     /**
    75      * @param $dtd_regex Allowed child pattern from the DTD
    76      */
    77     function HTMLPurifier_ChildDef_Custom($dtd_regex) {
    78         $this->dtd_regex = $dtd_regex;
    79         $this->_compileRegex();
    80     }
    81     /**
    82      * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
    83      */
    84     function _compileRegex() {
    85         $raw = str_replace(' ', '', $this->dtd_regex);
    86         if ($raw{0} != '(') {
    87             $raw = "($raw)";
    88         }
    89         $reg = str_replace(',', ',?', $raw);
    90         $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
    91         $this->_pcre_regex = $reg;
    92     }
    93     function validateChildren($tokens_of_children, $config, &$context) {
    94         $list_of_children = '';
    95         $nesting = 0; // depth into the nest
    96         foreach ($tokens_of_children as $token) {
    97             if (!empty($token->is_whitespace)) continue;
    98            
    99             $is_child = ($nesting == 0); // direct
    100            
    101             if ($token->type == 'start') {
    102                 $nesting++;
    103             } elseif ($token->type == 'end') {
    104                 $nesting--;
    105             }
    106            
    107             if ($is_child) {
    108                 $list_of_children .= $token->name . ',';
    109             }
    110         }
    111         $list_of_children = rtrim($list_of_children, ',');
    112        
    113         $okay =
    114             preg_match(
    115                 '/^'.$this->_pcre_regex.'$/',
    116                 $list_of_children
    117             );
    118        
    119         return (bool) $okay;
    120     }
    121 }
    122 
    123 /**
    124  * Definition that allows a set of elements, but disallows empty children.
    125  */
    126 class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
    127 {
    128     /**
    129      * Lookup table of allowed elements.
    130      */
    131     var $elements = array();
    132     /**
    133      * @param $elements List of allowed element names (lowercase).
    134      */
    135     function HTMLPurifier_ChildDef_Required($elements) {
    136         if (is_string($elements)) {
    137             $elements = str_replace(' ', '', $elements);
    138             $elements = explode('|', $elements);
    139         }
    140         $elements = array_flip($elements);
    141         foreach ($elements as $i => $x) $elements[$i] = true;
    142         $this->elements = $elements;
    143         $this->gen = new HTMLPurifier_Generator();
    144     }
    145     var $allow_empty = false;
    146     var $type = 'required';
    147     function validateChildren($tokens_of_children, $config, &$context) {
    148         // if there are no tokens, delete parent node
    149         if (empty($tokens_of_children)) return false;
    150        
    151         // the new set of children
    152         $result = array();
    153        
    154         // current depth into the nest
    155         $nesting = 0;
    156        
    157         // whether or not we're deleting a node
    158         $is_deleting = false;
    159        
    160         // whether or not parsed character data is allowed
    161         // this controls whether or not we silently drop a tag
    162         // or generate escaped HTML from it
    163         $pcdata_allowed = isset($this->elements['#PCDATA']);
    164        
    165         // a little sanity check to make sure it's not ALL whitespace
    166         $all_whitespace = true;
    167        
    168         // some configuration
    169         $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
    170        
    171         foreach ($tokens_of_children as $token) {
    172             if (!empty($token->is_whitespace)) {
    173                 $result[] = $token;
    174                 continue;
    175             }
    176             $all_whitespace = false; // phew, we're not talking about whitespace
    177            
    178             $is_child = ($nesting == 0);
    179            
    180             if ($token->type == 'start') {
    181                 $nesting++;
    182             } elseif ($token->type == 'end') {
    183                 $nesting--;
    184             }
    185            
    186             if ($is_child) {
    187                 $is_deleting = false;
    188                 if (!isset($this->elements[$token->name])) {
    189                     $is_deleting = true;
    190                     if ($pcdata_allowed && $token->type == 'text') {
    191                         $result[] = $token;
    192                     } elseif ($pcdata_allowed && $escape_invalid_children) {
    193                         $result[] = new HTMLPurifier_Token_Text(
    194                             $this->gen->generateFromToken($token, $config)
    195                         );
    196                     }
    197                     continue;
    198                 }
    199             }
    200             if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
    201                 $result[] = $token;
    202             } elseif ($pcdata_allowed && $escape_invalid_children) {
    203                 $result[] =
    204                     new HTMLPurifier_Token_Text(
    205                         $this->gen->generateFromToken( $token, $config )
    206                     );
    207             } else {
    208                 // drop silently
    209             }
    210         }
    211         if (empty($result)) return false;
    212         if ($all_whitespace) return false;
    213         if ($tokens_of_children == $result) return true;
    214         return $result;
    215     }
    216 }
    217 
    218 /**
    219  * Definition that allows a set of elements, and allows no children.
    220  * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
    221  *       really, one shouldn't inherit from the other.  Only altered behavior
    222  *       is to overload a returned false with an array.  Thus, it will never
    223  *       return false.
    224  */
    225 class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
    226 {
    227     var $allow_empty = true;
    228     var $type = 'optional';
    229     function validateChildren($tokens_of_children, $config, &$context) {
    230         $result = parent::validateChildren($tokens_of_children, $config, $context);
    231         if ($result === false) return array();
    232         return $result;
    233     }
    234 }
    235 
    236 /**
    237  * Definition that disallows all elements.
    238  * @warning validateChildren() in this class is actually never called, because
    239  *          empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
    240  *          before child definitions are parsed in earnest by
    241  *          HTMLPurifier_Strategy_FixNesting.
    242  */
    243 class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
    244 {
    245     var $allow_empty = true;
    246     var $type = 'empty';
    247     function HTMLPurifier_ChildDef_Empty() {}
    248     function validateChildren($tokens_of_children, $config, &$context) {
    249         return array();
    250     }
    251 }
    252 
    253 /**
    254  * Definition that uses different definitions depending on context.
    255  *
    256  * The del and ins tags are notable because they allow different types of
    257  * elements depending on whether or not they're in a block or inline context.
    258  * Chameleon allows this behavior to happen by using two different
    259  * definitions depending on context.  While this somewhat generalized,
    260  * it is specifically intended for those two tags.
    261  */
    262 class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
    263 {
    264    
    265     /**
    266      * Instance of the definition object to use when inline. Usually stricter.
    267      */
    268     var $inline;
    269     /**
    270      * Instance of the definition object to use when block.
    271      */
    272     var $block;
    273    
    274     /**
    275      * @param $inline List of elements to allow when inline.
    276      * @param $block List of elements to allow when block.
    277      */
    278     function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
    279         $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
    280         $this->block  = new HTMLPurifier_ChildDef_Optional($block);
    281     }
    282    
    283     function validateChildren($tokens_of_children, $config, &$context) {
    284         $parent_type = $context->get('ParentType');
    285         switch ($parent_type) {
    286             case 'unknown':
    287             case 'inline':
    288                 $result = $this->inline->validateChildren(
    289                     $tokens_of_children, $config, $context);
    290                 break;
    291             case 'block':
    292                 $result = $this->block->validateChildren(
    293                     $tokens_of_children, $config, $context);
    294                 break;
    295             default:
    296                 trigger_error('Invalid context', E_USER_ERROR);
    297                 return false;
    298         }
    299         return $result;
    300     }
    301 }
    302 
    303 /**
    304  * Definition for tables
    305  */
    306 class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
    307 {
    308     var $allow_empty = false;
    309     var $type = 'table';
    310     function HTMLPurifier_ChildDef_Table() {}
    311     function validateChildren($tokens_of_children, $config, &$context) {
    312         if (empty($tokens_of_children)) return false;
    313        
    314         // this ensures that the loop gets run one last time before closing
    315         // up. It's a little bit of a hack, but it works! Just make sure you
    316         // get rid of the token later.
    317         $tokens_of_children[] = false;
    318        
    319         // only one of these elements is allowed in a table
    320         $caption = false;
    321         $thead   = false;
    322         $tfoot   = false;
    323        
    324         // as many of these as you want
    325         $cols    = array();
    326         $content = array();
    327        
    328         $nesting = 0; // current depth so we can determine nodes
    329         $is_collecting = false; // are we globbing together tokens to package
    330                                 // into one of the collectors?
    331         $collection = array(); // collected nodes
    332         $tag_index = 0; // the first node might be whitespace,
    333                             // so this tells us where the start tag is
    334        
    335         foreach ($tokens_of_children as $token) {
    336             $is_child = ($nesting == 0);
    337            
    338             if ($token === false) {
    339                 // terminating sequence started
    340             } elseif ($token->type == 'start') {
    341                 $nesting++;
    342             } elseif ($token->type == 'end') {
    343                 $nesting--;
    344             }
    345            
    346             // handle node collection
    347             if ($is_collecting) {
    348                 if ($is_child) {
    349                     // okay, let's stash the tokens away
    350                     // first token tells us the type of the collection
    351                     switch ($collection[$tag_index]->name) {
    352                         case 'tr':
    353                         case 'tbody':
    354                             $content[] = $collection;
    355                             break;
    356                         case 'caption':
    357                             if ($caption !== false) break;
    358                             $caption = $collection;
    359                             break;
    360                         case 'thead':
    361                         case 'tfoot':
    362                             // access the appropriate variable, $thead or $tfoot
    363                             $var = $collection[$tag_index]->name;
    364                             if ($$var === false) {
    365                                 $$var = $collection;
    366                             } else {
    367                                 // transmutate the first and less entries into
    368                                 // tbody tags, and then put into content
    369                                 $collection[$tag_index]->name = 'tbody';
    370                                 $collection[count($collection)-1]->name = 'tbody';
    371                                 $content[] = $collection;
    372                             }
    373                             break;
    374                          case 'colgroup':
    375                             $cols[] = $collection;
    376                             break;
    377                     }
    378                     $collection = array();
    379                     $is_collecting = false;
    380                     $tag_index = 0;
    381                 } else {
    382                     // add the node to the collection
    383                     $collection[] = $token;
    384                 }
    385             }
    386            
    387             // terminate
    388             if ($token === false) break;
    389            
    390             if ($is_child) {
    391                 // determine what we're dealing with
    392                 if ($token->name == 'col') {
    393                     // the only empty tag in the possie, we can handle it
    394                     // immediately
    395                     $cols[] = array_merge($collection, array($token));
    396                     $collection = array();
    397                     $tag_index = 0;
    398                     continue;
    399                 }
    400                 switch($token->name) {
    401                     case 'caption':
    402                     case 'colgroup':
    403                     case 'thead':
    404                     case 'tfoot':
    405                     case 'tbody':
    406                     case 'tr':
    407                         $is_collecting = true;
    408                         $collection[] = $token;
    409                         continue;
    410                     default:
    411                         if ($token->type == 'text' && $token->is_whitespace) {
    412                             $collection[] = $token;
    413                             $tag_index++;
    414                         }
    415                         continue;
    416                 }
    417             }
    418         }
    419        
    420         if (empty($content)) return false;
    421        
    422         $ret = array();
    423         if ($caption !== false) $ret = array_merge($ret, $caption);
    424         if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
    425         if ($thead !== false)   $ret = array_merge($ret, $thead);
    426         if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
    427         foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
    428         if (!empty($collection) && $is_collecting == false){
    429             // grab the trailing space
    430             $ret = array_merge($ret, $collection);
    431         }
    432        
    433         array_pop($tokens_of_children); // remove phantom token
    434        
    435         return ($ret === $tokens_of_children) ? true : $ret;
    436        
    437     }
    438 }
    439 
    44055?>
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/Config.php

    r7584 r7599  
    7070   
    7171    /**
     72     * Retreives an array of directives to values from a given namespace
     73     * @param $namespace String namespace
     74     */
     75    function getBatch($namespace) {
     76        if (!isset($this->def->info[$namespace])) {
     77            trigger_error('Cannot retrieve undefined namespace',
     78                E_USER_WARNING);
     79            return;
     80        }
     81        return $this->conf[$namespace];
     82    }
     83   
     84    /**
    7285     * Sets a value to configuration.
    7386     * @param $namespace String namespace
     
    135148    function loadArray($config_array) {
    136149        foreach ($config_array as $key => $value) {
     150            $key = str_replace('_', '.', $key);
    137151            if (strpos($key, '.') !== false) {
    138152                // condensed form
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/ConfigSchema.php

    r7584 r7599  
    248248                if (is_int($var) && ($var === 0 || $var === 1)) {
    249249                    $var = (bool) $var;
     250                } elseif (is_string($var)) {
     251                    if ($var == 'on' || $var == 'true' || $var == '1') {
     252                        $var = true;
     253                    } elseif ($var == 'off' || $var == 'false' || $var == '0') {
     254                        $var = false;
     255                    } else {
     256                        break;
     257                    }
    250258                } elseif (!is_bool($var)) break;
    251259                return $var;
     
    253261            case 'hash':
    254262            case 'lookup':
     263                if (is_string($var)) {
     264                    // simplistic string to array method that only works
     265                    // for simple lists of tag names or alphanumeric characters
     266                    $var = explode(',',$var);
     267                    // remove spaces
     268                    foreach ($var as $i => $j) $var[$i] = trim($j);
     269                }
    255270                if (!is_array($var)) break;
    256271                $keys = array_keys($var);
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php

    r7584 r7599  
    1919    require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
    2020require_once 'HTMLPurifier/ChildDef.php';
     21    require_once 'HTMLPurifier/ChildDef/Chameleon.php';
     22    require_once 'HTMLPurifier/ChildDef/Empty.php';
     23    require_once 'HTMLPurifier/ChildDef/Required.php';
     24    require_once 'HTMLPurifier/ChildDef/Optional.php';
     25    require_once 'HTMLPurifier/ChildDef/Table.php';
     26    require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
    2127require_once 'HTMLPurifier/Generator.php';
    2228require_once 'HTMLPurifier/Token.php';
     
    3440    'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
    3541    'versions.'
     42);
     43
     44HTMLPurifier_ConfigSchema::define(
     45    'HTML', 'Strict', false, 'bool',
     46    'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
     47    'This directive has been available since 1.3.0.'
     48);
     49
     50HTMLPurifier_ConfigSchema::define(
     51    'HTML', 'BlockWrapper', 'p', 'string',
     52    'String name of element to wrap inline elements that are inside a block '.
     53    'context.  This only occurs in the children of blockquote in strict mode. '.
     54    'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
     55    'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
     56    '<code>&lt;p&gt;</code> tags can be replaced '.
     57    'with whatever you desire, as long as it is a block level element. '.
     58    'This directive has been available since 1.3.0.'
     59);
     60
     61HTMLPurifier_ConfigSchema::define(
     62    'HTML', 'Parent', 'div', 'string',
     63    'String name of element that HTML fragment passed to library will be '.
     64    'inserted in.  An interesting variation would be using span as the '.
     65    'parent element, meaning that only inline tags would be allowed. '.
     66    'This directive has been available since 1.3.0.'
     67);
     68
     69HTMLPurifier_ConfigSchema::define(
     70    'HTML', 'AllowedElements', null, 'lookup/null',
     71    'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
     72    'can overload it with your own list of tags to allow.  Note that this '.
     73    'method is subtractive: it does its job by taking away from HTML Purifier '.
     74    'usual feature set, so you cannot add a tag that HTML Purifier never '.
     75    'supported in the first place (like embed).  If you change this, you '.
     76    'probably also want to change %HTML.AllowedAttributes. '.
     77    '<strong>Warning:</strong> If another directive conflicts with the '.
     78    'elements here, <em>that</em> directive will win and override. '.
     79    'This directive has been available since 1.3.0.'
     80);
     81
     82HTMLPurifier_ConfigSchema::define(
     83    'HTML', 'AllowedAttributes', null, 'lookup/null',
     84    'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
     85    'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
     86    '(style, id, class, dir, lang, xml:lang).'.
     87    '<strong>Warning:</strong> If another directive conflicts with the '.
     88    'elements here, <em>that</em> directive will win and override. For '.
     89    'example, %HTML.EnableAttrID will take precedence over *.id in this '.
     90    'directive.  You must set that directive to true before you can use '.
     91    'IDs at all. This directive has been available since 1.3.0.'
     92);
     93
     94HTMLPurifier_ConfigSchema::define(
     95    'Attr', 'DisableURI', false, 'bool',
     96    'Disables all URIs in all forms. Not sure why you\'d want to do that '.
     97    '(after all, the Internet\'s founded on the notion of a hyperlink). '.
     98    'This directive has been available since 1.3.0.'
    3699);
    37100
     
    70133    /**
    71134     * String name of parent element HTML will be going into.
    72      * @todo Allow this to be overloaded by user config
    73135     * @public
    74136     */
     
    76138   
    77139    /**
     140     * Definition for parent element, allows parent element to be a
     141     * tag that's not allowed inside the HTML fragment.
     142     * @public
     143     */
     144    var $info_parent_def;
     145   
     146    /**
     147     * String name of element used to wrap inline elements in block context
     148     * @note This is rarely used except for BLOCKQUOTEs in strict mode
     149     * @public
     150     */
     151    var $info_block_wrapper = 'p';
     152   
     153    /**
    78154     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
    79155     * @public
     
    94170   
    95171    /**
     172     * Lookup table of flow elements
     173     * @public
     174     */
     175    var $info_flow_elements = array();
     176   
     177    /**
     178     * Boolean is a strict definition?
     179     * @public
     180     */
     181    var $strict;
     182   
     183    /**
    96184     * Initializes the definition, the meat of the class.
    97185     */
    98186    function setup($config) {
    99187       
    100         // emulates the structure of the DTD
    101         // these are condensed, however, with bad stuff taken out
    102         // screening process was done by hand
     188        // some cached config values
     189        $this->strict = $config->get('HTML', 'Strict');
    103190       
    104191        //////////////////////////////////////////////////////////////////////
     
    112199                'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
    113200                'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
    114                 'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', 'u', 's',
    115                 'strike', 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
     201                'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
     202                'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
    116203                'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
    117204                'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
     
    119206            );
    120207       
     208        if (!$this->strict) {
     209            $allowed_tags[] = 'u';
     210            $allowed_tags[] = 's';
     211            $allowed_tags[] = 'strike';
     212        }
     213       
    121214        foreach ($allowed_tags as $tag) {
    122215            $this->info[$tag] = new HTMLPurifier_ElementDef();
     
    125218        //////////////////////////////////////////////////////////////////////
    126219        // info[]->child : defines allowed children for elements
     220       
     221        // emulates the structure of the DTD
     222        // however, these are condensed, with bad stuff taken out
     223        // screening process was done by hand
    127224       
    128225        // entities: prefixed with e_ and _ replaces . from DTD
     
    149246          ' | cite | abbr | acronym';
    150247        $e_phrase = "$e_phrase_basic | $e_phrase_extra";
    151         $e_inline_forms = ''; // humor the dtd
    152248        $e_misc_inline = 'ins | del';
    153249        $e_misc = "$e_misc_inline";
    154         $e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
    155           " | $e_inline_forms";
     250        $e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
    156251        // pseudo-property we created for convenience, see later on
    157252        $e__inline = "#PCDATA | $e_inline | $e_misc_inline";
     
    162257        $e_blocktext = 'pre | hr | blockquote | address';
    163258        $e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
     259        $e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
    164260        $e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
    165261        $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
    166262        $e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
    167           " | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms".
    168           " | $e_misc_inline");
     263          " | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
    169264        $e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
    170265          " | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
    171           " | $e_inline_forms | $e_misc_inline");
     266          " | $e_misc_inline");
    172267        $e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
    173268        $e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
     
    177272            new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
    178273       
    179         $this->info['blockquote']->child=
    180274        $this->info['dd']->child  =
    181275        $this->info['li']->child  =
    182276        $this->info['div']->child = $e_Flow;
     277       
     278        if ($this->strict) {
     279            $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
     280        } else {
     281            $this->info['blockquote']->child = $e_Flow;
     282        }
    183283       
    184284        $this->info['caption']->child   =
     
    221321        $this->info['dl']->child   = new HTMLPurifier_ChildDef_Required('dt|dd');
    222322       
    223         $this->info['address']->child =
    224           new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
    225               " | $e_misc_inline");
     323        if ($this->strict) {
     324            $this->info['address']->child = $e_Inline;
     325        } else {
     326            $this->info['address']->child =
     327              new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
     328                  " | $e_misc_inline");
     329        }
    226330       
    227331        $this->info['img']->child  =
     
    251355        // reuses $e_Inline and $e_Block
    252356        foreach ($e_Inline->elements as $name => $bool) {
    253             if ($name == '#PCDATA' || $name == '') continue;
     357            if ($name == '#PCDATA') continue;
    254358            $this->info[$name]->type = 'inline';
    255359        }
    256360       
    257         $e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
    258361        foreach ($e_Block->elements as $name => $bool) {
    259362            $this->info[$name]->type = 'block';
     363        }
     364       
     365        foreach ($e_Flow->elements as $name => $bool) {
     366            $this->info_flow_elements[$name] = true;
    260367        }
    261368       
     
    349456        $this->info['th']->attr['colspan'] = $e__NumberSpan;
    350457       
    351         $e_URI = new HTMLPurifier_AttrDef_URI();
    352         $this->info['a']->attr['href'] =
    353         $this->info['img']->attr['longdesc'] =
    354         $this->info['del']->attr['cite'] =
    355         $this->info['ins']->attr['cite'] =
    356         $this->info['blockquote']->attr['cite'] =
    357         $this->info['q']->attr['cite'] = $e_URI;
    358        
    359         // URI that causes HTTP request
    360         $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
     458        if (!$config->get('Attr', 'DisableURI')) {
     459            $e_URI = new HTMLPurifier_AttrDef_URI();
     460            $this->info['a']->attr['href'] =
     461            $this->info['img']->attr['longdesc'] =
     462            $this->info['del']->attr['cite'] =
     463            $this->info['ins']->attr['cite'] =
     464            $this->info['blockquote']->attr['cite'] =
     465            $this->info['q']->attr['cite'] = $e_URI;
     466           
     467            // URI that causes HTTP request
     468            $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
     469        }
     470       
     471        if (!$this->strict) {
     472            $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
     473            $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
     474        }
    361475       
    362476        //////////////////////////////////////////////////////////////////////
     
    423537        }
    424538       
     539        //////////////////////////////////////////////////////////////////////
     540        // info_block_wrapper : wraps inline elements in block context
     541       
     542        $block_wrapper = $config->get('HTML', 'BlockWrapper');
     543        if (isset($e_Block->elements[$block_wrapper])) {
     544            $this->info_block_wrapper = $block_wrapper;
     545        } else {
     546            trigger_error('Cannot use non-block element as block wrapper.',
     547                E_USER_ERROR);
     548        }
     549       
     550        //////////////////////////////////////////////////////////////////////
     551        // info_parent : parent element of the HTML fragment
     552       
     553        $parent = $config->get('HTML', 'Parent');
     554        if (isset($this->info[$parent])) {
     555            $this->info_parent = $parent;
     556        } else {
     557            trigger_error('Cannot use unrecognized element as parent.',
     558                E_USER_ERROR);
     559        }
     560        $this->info_parent_def = $this->info[$this->info_parent];
     561       
     562        //////////////////////////////////////////////////////////////////////
     563        // %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
     564        $allowed_elements = $config->get('HTML', 'AllowedElements');
     565        if (is_array($allowed_elements)) {
     566            // $allowed_elements[$this->info_parent] = true; // allow parent element
     567            foreach ($this->info as $name => $d) {
     568                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
     569            }
     570        }
     571        $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
     572        if (is_array($allowed_attributes)) {
     573            foreach ($this->info_global_attr as $attr => $info) {
     574                if (!isset($allowed_attributes["*.$attr"])) {
     575                    unset($this->info_global_attr[$attr]);
     576                }
     577            }
     578            foreach ($this->info as $tag => $info) {
     579                foreach ($info->attr as $attr => $attr_info) {
     580                    if (!isset($allowed_attributes["$tag.$attr"])) {
     581                        unset($this->info[$tag]->attr[$attr]);
     582                    }
     583                }
     584            }
     585        }
    425586    }
    426587   
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php

    r6358 r7599  
    105105                $parent_index = $stack[$count-1];
    106106                $parent_name  = $tokens[$parent_index]->name;
    107                 $parent_def   = $definition->info[$parent_name];
     107                if ($parent_index == 0) {
     108                    $parent_def   = $definition->info_parent_def;
     109                } else {
     110                    $parent_def   = $definition->info[$parent_name];
     111                }
    108112            } else {
    109113                // unknown info, it won't be used anyway
     
    142146                // there is an exclusion, remove the entire node
    143147                $result = false;
     148                $excludes = array(); // not used, but good to initialize anyway
    144149            } else {
    145150                // DEFINITION CALL
    146                 $def = $definition->info[$tokens[$i]->name];
     151                if ($i === 0) {
     152                    // special processing for the first node
     153                    $def = $definition->info_parent_def;
     154                } else {
     155                    $def = $definition->info[$tokens[$i]->name];
     156                   
     157                }
     158               
    147159                $child_def = $def->child;
    148160               
     
    229241            // Test if the token indeed is a start tag, if not, move forward
    230242            // and test again.
     243            $size = count($tokens);
    231244            while ($i < $size and $tokens[$i]->type != 'start') {
    232245                if ($tokens[$i]->type == 'end') {
     
    235248                    // pop an exclusion lookup off exclusion stack if
    236249                    // we ended node and that node had exclusions
    237                     if ($definition->info[$tokens[$i]->name]->excludes) {
     250                    if ($i == 0 || $i == $size - 1) {
     251                        // use specialized var if it's the super-parent
     252                        $s_excludes = $definition->info_parent_def->excludes;
     253                    } else {
     254                        $s_excludes = $definition->info[$tokens[$i]->name]->excludes;
     255                    }
     256                    if ($s_excludes) {
    238257                        array_pop($exclude_stack);
    239258                    }
  • _plugins_/_dev_/_ze_laboratoire_/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php

    r7584 r7599  
    55require_once 'HTMLPurifier/Generator.php';
    66require_once 'HTMLPurifier/TagTransform.php';
     7
     8HTMLPurifier_ConfigSchema::define(
     9    'Core', 'RemoveInvalidImg', true, 'bool',
     10    'This directive enables pre-emptive URI checking in <code>img</code> '.
     11    'tags, as the attribute validation strategy is not authorized to '.
     12    'remove elements from the document.  This directive has been available '.
     13    'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
     14);
    715
    816/**
     
    2634                // DEFINITION CALL
    2735                if (isset($definition->info[$token->name])) {
    28                     // leave untouched
     36                    // leave untouched, except for a few special cases:
     37                   
     38                    // hard-coded image special case, pre-emptively drop
     39                    // if not available. Probably not abstract-able
     40                    if ( $token->name == 'img' ) {
     41                        if (!isset($token->attr['src'])) continue;
     42                        if (!isset($definition->info['img']->attr['src'])) {
     43                            continue;
     44                        }
     45                        $token->attr['src'] =
     46                            $definition->
     47                                info['img']->
     48                                    attr['src']->
     49                                        validate($token->attr['src']);
     50                        if ($token->attr['src'] === false) continue;
     51                    }
     52                   
    2953                } elseif (
    3054                    isset($definition->info_tag_transform[$token->name])
Note: See TracChangeset for help on using the changeset viewer.