Source for file Parse.php

Documentation is available at Parse.php

  1. <?php
  2.  
  3. # Copyright (c) 2002-2005 Cunningham & Cunningham, Inc.
  4. # Released under the terms of the GNU General Public License version 2 or later.
  5. #
  6. # PHP5 translation by Luis A. Floreani <luis.floreani@gmail.com>
  7.  
  8. class Parse {
  9.  
  10. /**
  11. * @var string
  12. */
  13. public $leader;
  14. public $tag;
  15. public $body;
  16. public $end;
  17. public $trailer;
  18.  
  19. /**
  20. * @var Parse
  21. */
  22. public $parts;
  23. public $more;
  24.  
  25. /**
  26. * @var array
  27. */
  28. public static $tags = array( 'table', 'tr', 'td' );
  29. /**
  30. * @param string text
  31. * @param array tags
  32. * @param int level
  33. * @param int offset
  34. */
  35. public function __construct( $text, $tags = null, $level = 0, $offset = 0, $simple = false ) {
  36. if( $simple === true ) {
  37. $this->leader = "\n";
  38. $this->tag = "<".$text.">";
  39. $this->body = $tags;
  40. $this->end = "</".$text.">";
  41. $this->trailer = "";
  42. $this->parts = $level;
  43. $this->more = $offset;
  44. return;
  45. }
  46. if( $tags == null ) {
  47. $tags = Parse::$tags;
  48. }
  49. /*
  50. $lc = strtolower($text);
  51. $startTag = strpos($lc, '<' . $tags[$level]);
  52. $endTag = strpos($lc, '>', $startTag) + 1;
  53. $startEnd = strpos($lc, '</' . $tags[$level], $endTag);
  54. $endEnd = strpos($lc, '>', $startEnd) + 1;
  55. $startMore = strpos($lc, '<'.$tags[$level], $endEnd);
  56. */
  57. $startTag = stripos( $text, '<' . $tags[$level]);
  58. $endTag = stripos( $text, '>', $startTag) + 1;
  59. $startEnd = stripos( $text, '</' . $tags[$level], $endTag);
  60. $endEnd = stripos( $text, '>', $startEnd) + 1;
  61. $startMore = stripos( $text, '<'.$tags[$level], $endEnd);
  62. if ($startTag === false || $endTag === false || $startEnd === false || $endEnd === false) {
  63. throw new ParseException("Can't find tag: " . $tags[$level], $offset);
  64. }
  65. $this->leader = substr($text, 0, $startTag);
  66. $this->tag = substr($text, $startTag, $endTag - $startTag);
  67. $this->body = substr($text, $endTag, $startEnd - $endTag);
  68. $this->end = substr($text, $startEnd, $endEnd - $startEnd);
  69. $this->trailer = substr($text, $endEnd);
  70.  
  71. if ($level+1 < count($tags)) {
  72. $this->parts = new Parse($this->body, $tags, $level+1, $offset + $endTag);
  73. $this->body = null;
  74. } else {
  75. $index = stripos($this->body, '<'.$tags[0]);
  76. if ($index !== false) {
  77. $parts = new Parse($this->body, $tags, 0, $offset + $endTag);
  78. $this->body = '';
  79. }
  80. }
  81. if ($startMore !== false) {
  82. $this->more = new Parse($this->trailer, $tags, $level, $offset + $endEnd);
  83. $this->trailer = null;
  84. }
  85. }
  86.  
  87.  
  88. /**
  89. * @return int
  90. */
  91.  
  92. public function size() {
  93. return ($this->more==null) ? 1 : $this->more->size()+1;
  94. }
  95.  
  96.  
  97. /**
  98. * @return Parse
  99. */
  100.  
  101. public function last() {
  102. return ($this->more==null) ? $this : $this->more->last();
  103. }
  104.  
  105. /**
  106. * @return Parse
  107. */
  108.  
  109. public function leaf() {
  110. return ($this->parts==null) ? $this : $this->parts->leaf();
  111. }
  112.  
  113.  
  114.  
  115. /**
  116. * @param int i
  117. * @param int j
  118. * @param int k
  119. * @return Parse
  120. */
  121.  
  122. public function at($i, $j = null, $k = null) {
  123. if ($j === null) {
  124. return ($i == 0 || $this->more == null) ? $this : $this->more->at($i-1);
  125. } else if ($k === null)
  126. return $this->at($i)->parts->at($j);
  127. else
  128. return $this->at($i, $j)->parts->at($k);
  129. }
  130.  
  131.  
  132. /**
  133. * @return string
  134. */
  135.  
  136. public function text() {
  137. return Parse::htmlToText($this->body);
  138. }
  139.  
  140.  
  141. /**
  142. * @param string
  143. * @return string
  144. */
  145.  
  146. public static function htmlToText($s) {
  147. $s = Parse::normalizeLineBreaks($s);
  148. $s = Parse::removeNonBreakTags($s);
  149. $s = Parse::condenseWhitespace($s);
  150. $s = Parse::unescape($s);
  151. return $s;
  152. }
  153.  
  154. /**
  155. * @param string
  156. * @return string
  157. */
  158.  
  159. public static function unescape($s) {
  160. $s = str_replace("<br />", "\n", $s);
  161. $s = Parse::unescapeEntities($s);
  162. $s = Parse::unescapeSmartQuotes($s);
  163. return $s;
  164. }
  165.  
  166.  
  167. /**
  168. * @param string
  169. * @return string
  170. */
  171.  
  172. private static function unescapeEntities($s) {
  173. $s = str_replace('&lt;', '<', $s);
  174. $s = str_replace('&gt;', '>', $s);
  175. $s = str_replace('&nbsp;', ' ', $s);
  176. $s = str_replace('&quot;', '\"', $s);
  177. $s = str_replace('&amp;', '&', $s);
  178. return $s;
  179. }
  180.  
  181.  
  182. /**
  183. * @param string
  184. * @return string
  185. */
  186. public static function unescapeSmartQuotes($s) {
  187. /* NOT SURE */
  188. $s = ereg_replace('<93>', '"', $s);
  189. $s = ereg_replace('<94>', '"', $s);
  190. $s = ereg_replace('<91>', "'", $s);
  191. $s = ereg_replace('<92>', "'", $s);
  192.  
  193. /* NO SUPPORT FOR UNICODE IN PHP! :( */
  194. /*
  195. $s = ereg_replace('\u201c', '"', $s);
  196. $s = ereg_replace('\u201d', '"', $s);
  197. $s = ereg_replace('\u2018', '\'', $s);
  198. $s = ereg_replace('\u2019', '\'', $s);
  199. */
  200. return $s;
  201. }
  202.  
  203. /**
  204. * @param string
  205. * @return string
  206. */
  207.  
  208. private static function normalizeLineBreaks($s) {
  209. $s = preg_replace('|<\s*br\s*/?\s*>|s', '<br />', $s);
  210. $s = preg_replace('|<\s*/\s*p\s*>\s*<\s*p( .*?)?>|s', '<br />', $s);
  211. return $s;
  212. }
  213.  
  214. /**
  215. * @param string
  216. * @return string
  217. */
  218.  
  219. public static function condenseWhitespace($s) {
  220. $NON_BREAKING_SPACE = chr(160);
  221.  
  222. $s = preg_replace('|\s+|s', ' ', $s);
  223. $s = ereg_replace($NON_BREAKING_SPACE, ' ', $s);
  224. $s = ereg_replace('&nbsp;', ' ', $s);
  225.  
  226. $s = trim($s, "\t\n\r\ "); // GUARDA! PUEDE QUE FALLE!
  227. //$s = trim($s, "\t.\n.\r.\0.\x0B.\ "); // GUARDA! PUEDE QUE FALLE!
  228. return $s;
  229. }
  230.  
  231.  
  232. /**
  233. * @param string
  234. * @return string
  235. */
  236.  
  237. private static function removeNonBreakTags($s) {
  238. $i=0;
  239. $i = strpos($s,'<',$i);
  240. while ($i !== false) {
  241. $j = strpos($s,'>',$i+1);
  242. if ($j>0) {
  243. if (substr($s, $i, $j+1-$i) != '<br />') {
  244. $s = substr($s, 0, $i) . substr($s, $j+1);
  245. } else {
  246. $i++;
  247. }
  248. } else {
  249. break;
  250. }
  251. $i = strpos($s,'<',$i);
  252. }
  253. return $s;
  254. }
  255.  
  256.  
  257. /**
  258. * @param string text
  259. */
  260.  
  261. public function addToTag($text) {
  262. $last = strlen($this->tag)-1;
  263. $this->tag = substr($this->tag, 0, $last) . $text . '>';
  264. }
  265.  
  266.  
  267. /**
  268. * @param string text
  269. */
  270.  
  271. public function addToBody($text) {
  272. $this->body = $this->body . $text;
  273. }
  274.  
  275. /**
  276. * @return string
  277. */
  278. public function toString() {
  279. $out = $this->leader;
  280. $out .= $this->tag;
  281. if ($this->parts != null) {
  282. $out .= $this->parts->toString();
  283. } else {
  284. $out .= $this->body;
  285. }
  286. $out .= $this->end;
  287. if ($this->more != null) {
  288. $out .= $this->more->toString();
  289. } else {
  290. $out .= $this->trailer;
  291. }
  292. return $out;
  293. }
  294. }
  295.  
  296. class ParseException extends Exception {
  297.  
  298. /**
  299. * @var int offset
  300. */
  301. protected $offset = 0;
  302.  
  303. public function __construct($message, $offset) {
  304. $this->offset = $offset;
  305. parent::__construct($message);
  306. }
  307. public function __toString() {
  308. return __CLASS__ . " {$this->message} at {$this->offset}\n";
  309. }
  310.  
  311. /**
  312. * @return int
  313. */
  314.  
  315. public function getErrorOffset() {
  316. return $this->offset;
  317. }
  318. }
  319.  

Documentation generated on Sun, 02 Apr 2006 16:01:05 +0200 by phpDocumentor 1.3.0RC5