Changeset 54631 in spip-zone


Ignore:
Timestamp:
Nov 19, 2011, 3:13:11 PM (8 years ago)
Author:
cedric@…
Message:

mise a jour de la librairie PHP-Readability

Location:
_plugins_/readability
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • _plugins_/readability/lib/readability/Readability.php

    r45302 r54631  
    1111* License: Apache License, Version 2.0
    1212* Requires: PHP5
    13 * Date: 2010-10-29
     13* Date: 2011-07-22
    1414*
    1515* Differences between the PHP port and the original
     
    4747
    4848// Alternative usage (for testing only!)
    49 // uncomment the lins below and call Readability.php in your browser
     49// uncomment the lines below and call Readability.php in your browser
    5050// passing it the URL of the page you'd like content from, e.g.:
    5151// Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php
     
    110110        {
    111111                /* Turn all double br's into p's */
    112                 /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
    113112                $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html);
    114113                $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html);
     
    117116                $this->dom->preserveWhiteSpace = false;
    118117                $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
     118                if (trim($html) == '') $html = '<html></html>';
    119119                @$this->dom->loadHTML($html);
    120120                $this->url = $url;
     
    151151        public function init()
    152152        {
     153                if (!isset($this->dom->documentElement)) return false;
    153154                $this->removeScripts($this->dom);
    154155                //die($this->getInnerHTML($this->dom->documentElement));
     
    294295                        $this->dom->documentElement->appendChild($this->body);
    295296                }
    296                
    297297                $this->body->setAttribute('id', 'readabilityBody');
    298298
     
    665665                {
    666666                        $topCandidate = $this->dom->createElement('div');
    667                         $topCandidate->innerHTML = ($page instanceof DOMDocument) ? $page->saveXML($page->documentElement) : $page->innerHTML;
    668                         $page->innerHTML = '';
    669                         $page->appendChild($topCandidate);
     667                        if ($page instanceof DOMDocument) {
     668                                if (!isset($page->documentElement)) {
     669                                        // we don't have a body either? what a mess! :)
     670                                } else {
     671                                        $topCandidate->innerHTML = $page->documentElement->innerHTML;
     672                                        $page->documentElement->innerHTML = '';
     673                                        $page->documentElement->appendChild($topCandidate);
     674                                }
     675                        } else {
     676                                $topCandidate->innerHTML = $page->innerHTML;
     677                                $page->innerHTML = '';
     678                                $page->appendChild($topCandidate);
     679                        }
    670680                        $this->initializeNode($topCandidate);
    671681                }
     
    679689                $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2);
    680690                $siblingNodes          = $topCandidate->parentNode->childNodes;
     691                if (!isset($siblingNodes)) {
     692                        $siblingNodes = new stdClass;
     693                        $siblingNodes->length = 0;
     694                }
    681695
    682696                for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++)
     
    770784                if (strlen($this->getInnerText($articleContent, false)) < 250)
    771785                {
     786                        // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7
     787                        // in the meantime, we check and create an empty element if it's not there.
     788                        if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body');
    772789                        $this->body->innerHTML = $this->bodyCache;
    773790                       
     
    847864        */
    848865        public function cleanStyles($e) {
     866                if (!is_object($e)) return;
    849867                $elems = $e->getElementsByTagName('*');
    850868                foreach ($elems as $elem) {
  • _plugins_/readability/plugin.xml

    r54585 r54631  
    44        <auteur>Arc90, Keyvan Minoukadeh, Fil</auteur>
    55        <licence>Apache License Version 2.0</licence>
    6         <version>0.1.1</version>
     6        <version>0.2.0</version>
    77        <etat>test</etat>
    88        <lien>http://www.spip-contrib.net/3758</lien>
Note: See TracChangeset for help on using the changeset viewer.