Documentation is available at Parse.php
- <?php
- # Copyright (c) 2002-2005 Cunningham & Cunningham, Inc.
- # Released under the terms of the GNU General Public License version 2 or later.
- #
- # PHP5 translation by Luis A. Floreani <luis.floreani@gmail.com>
- class Parse {
- /**
- * @var string
- */
- public $leader;
- public $tag;
- public $body;
- public $end;
- public $trailer;
- /**
- * @var Parse
- */
- public $parts;
- public $more;
- /**
- * @var array
- */
- public static $tags = array( 'table', 'tr', 'td' );
- /**
- * @param string text
- * @param array tags
- * @param int level
- * @param int offset
- */
- public function __construct( $text, $tags = null, $level = 0, $offset = 0, $simple = false ) {
- if( $simple === true ) {
- $this->leader = "\n";
- $this->tag = "<".$text.">";
- $this->body = $tags;
- $this->end = "</".$text.">";
- $this->trailer = "";
- $this->parts = $level;
- $this->more = $offset;
- return;
- }
- if( $tags == null ) {
- $tags = Parse::$tags;
- }
- /*
- $lc = strtolower($text);
- $startTag = strpos($lc, '<' . $tags[$level]);
- $endTag = strpos($lc, '>', $startTag) + 1;
- $startEnd = strpos($lc, '</' . $tags[$level], $endTag);
- $endEnd = strpos($lc, '>', $startEnd) + 1;
- $startMore = strpos($lc, '<'.$tags[$level], $endEnd);
- */
- $startTag = stripos( $text, '<' . $tags[$level]);
- $endTag = stripos( $text, '>', $startTag) + 1;
- $startEnd = stripos( $text, '</' . $tags[$level], $endTag);
- $endEnd = stripos( $text, '>', $startEnd) + 1;
- $startMore = stripos( $text, '<'.$tags[$level], $endEnd);
- if ($startTag === false || $endTag === false || $startEnd === false || $endEnd === false) {
- throw new ParseException("Can't find tag: " . $tags[$level], $offset);
- }
- $this->leader = substr($text, 0, $startTag);
- $this->tag = substr($text, $startTag, $endTag - $startTag);
- $this->body = substr($text, $endTag, $startEnd - $endTag);
- $this->end = substr($text, $startEnd, $endEnd - $startEnd);
- $this->trailer = substr($text, $endEnd);
- if ($level+1 < count($tags)) {
- $this->parts = new Parse($this->body, $tags, $level+1, $offset + $endTag);
- $this->body = null;
- } else {
- $index = stripos($this->body, '<'.$tags[0]);
- if ($index !== false) {
- $parts = new Parse($this->body, $tags, 0, $offset + $endTag);
- $this->body = '';
- }
- }
- if ($startMore !== false) {
- $this->more = new Parse($this->trailer, $tags, $level, $offset + $endEnd);
- $this->trailer = null;
- }
- }
- /**
- * @return int
- */
- public function size() {
- return ($this->more==null) ? 1 : $this->more->size()+1;
- }
- /**
- * @return Parse
- */
- public function last() {
- return ($this->more==null) ? $this : $this->more->last();
- }
- /**
- * @return Parse
- */
- public function leaf() {
- return ($this->parts==null) ? $this : $this->parts->leaf();
- }
- /**
- * @param int i
- * @param int j
- * @param int k
- * @return Parse
- */
- public function at($i, $j = null, $k = null) {
- if ($j === null) {
- return ($i == 0 || $this->more == null) ? $this : $this->more->at($i-1);
- } else if ($k === null)
- return $this->at($i)->parts->at($j);
- else
- return $this->at($i, $j)->parts->at($k);
- }
- /**
- * @return string
- */
- public function text() {
- return Parse::htmlToText($this->body);
- }
- /**
- * @param string
- * @return string
- */
- public static function htmlToText($s) {
- $s = Parse::normalizeLineBreaks($s);
- $s = Parse::removeNonBreakTags($s);
- $s = Parse::condenseWhitespace($s);
- $s = Parse::unescape($s);
- return $s;
- }
- /**
- * @param string
- * @return string
- */
- public static function unescape($s) {
- $s = str_replace("<br />", "\n", $s);
- $s = Parse::unescapeEntities($s);
- $s = Parse::unescapeSmartQuotes($s);
- return $s;
- }
- /**
- * @param string
- * @return string
- */
- private static function unescapeEntities($s) {
- $s = str_replace('<', '<', $s);
- $s = str_replace('>', '>', $s);
- $s = str_replace(' ', ' ', $s);
- $s = str_replace('"', '\"', $s);
- $s = str_replace('&', '&', $s);
- return $s;
- }
- /**
- * @param string
- * @return string
- */
- public static function unescapeSmartQuotes($s) {
- /* NOT SURE */
- $s = ereg_replace('<93>', '"', $s);
- $s = ereg_replace('<94>', '"', $s);
- $s = ereg_replace('<91>', "'", $s);
- $s = ereg_replace('<92>', "'", $s);
- /* NO SUPPORT FOR UNICODE IN PHP! :( */
- /*
- $s = ereg_replace('\u201c', '"', $s);
- $s = ereg_replace('\u201d', '"', $s);
- $s = ereg_replace('\u2018', '\'', $s);
- $s = ereg_replace('\u2019', '\'', $s);
- */
- return $s;
- }
- /**
- * @param string
- * @return string
- */
- private static function normalizeLineBreaks($s) {
- $s = preg_replace('|<\s*br\s*/?\s*>|s', '<br />', $s);
- $s = preg_replace('|<\s*/\s*p\s*>\s*<\s*p( .*?)?>|s', '<br />', $s);
- return $s;
- }
- /**
- * @param string
- * @return string
- */
- public static function condenseWhitespace($s) {
- $NON_BREAKING_SPACE = chr(160);
- $s = preg_replace('|\s+|s', ' ', $s);
- $s = ereg_replace($NON_BREAKING_SPACE, ' ', $s);
- $s = ereg_replace(' ', ' ', $s);
- $s = trim($s, "\t\n\r\ "); // GUARDA! PUEDE QUE FALLE!
- //$s = trim($s, "\t.\n.\r.\0.\x0B.\ "); // GUARDA! PUEDE QUE FALLE!
- return $s;
- }
- /**
- * @param string
- * @return string
- */
- private static function removeNonBreakTags($s) {
- $i=0;
- $i = strpos($s,'<',$i);
- while ($i !== false) {
- $j = strpos($s,'>',$i+1);
- if ($j>0) {
- if (substr($s, $i, $j+1-$i) != '<br />') {
- $s = substr($s, 0, $i) . substr($s, $j+1);
- } else {
- $i++;
- }
- } else {
- break;
- }
- $i = strpos($s,'<',$i);
- }
- return $s;
- }
- /**
- * @param string text
- */
- public function addToTag($text) {
- $last = strlen($this->tag)-1;
- $this->tag = substr($this->tag, 0, $last) . $text . '>';
- }
- /**
- * @param string text
- */
- public function addToBody($text) {
- $this->body = $this->body . $text;
- }
- /**
- * @return string
- */
- public function toString() {
- $out = $this->leader;
- $out .= $this->tag;
- if ($this->parts != null) {
- $out .= $this->parts->toString();
- } else {
- $out .= $this->body;
- }
- $out .= $this->end;
- if ($this->more != null) {
- $out .= $this->more->toString();
- } else {
- $out .= $this->trailer;
- }
- return $out;
- }
- }
- class ParseException extends Exception {
- /**
- * @var int offset
- */
- protected $offset = 0;
- public function __construct($message, $offset) {
- $this->offset = $offset;
- parent::__construct($message);
- }
- public function __toString() {
- return __CLASS__ . " {$this->message} at {$this->offset}\n";
- }
- /**
- * @return int
- */
- public function getErrorOffset() {
- return $this->offset;
- }
- }
Documentation generated on Sun, 02 Apr 2006 16:01:05 +0200 by phpDocumentor 1.3.0RC5