Source for file Parse.php

Documentation is available at Parse.php

  1. <?php
  2.  
  3. # Copyright (c) 2002-2005 Cunningham & Cunningham, Inc.
  4. # Released under the terms of the GNU General Public License version 2 or later.
  5. #
  6. # PHP5 translation by Luis A. Floreani <luis.floreani@gmail.com>
  7.  
  8. /**
  9. * load exception clas: Parse
  10. */
  11. require_once 'PHPFIT/Exception/Parse.php';
  12.  
  13. class PHPFIT_Parse {
  14.  
  15. /**
  16. * @var string
  17. */
  18. public $leader;
  19. public $tag;
  20. public $body;
  21. public $end;
  22. public $trailer;
  23. public $count = 0;
  24.  
  25. /**
  26. * @var Parse
  27. */
  28. public $parts;
  29. public $more;
  30.  
  31. /**
  32. * @var array
  33. */
  34. public static $tags = array( 'table', 'tr', 'td' );
  35. /**
  36. * Keep in mind how often a tag was called
  37. *
  38. * Consider that "td" will be reseted if a "tr" starts. Also "tr" will be
  39. * reseted on start of "table"
  40. */
  41. private static $tagCount = array(
  42. 'table' => 0,
  43. 'tr' => 0,
  44. 'td' => 0,
  45. );
  46. /**
  47. * @param string text
  48. * @param array tags
  49. * @param int level
  50. * @param int offset
  51. */
  52. public function __construct( $text, $tags = null, $level = 0, $offset = 0, $simple = false ) {
  53. if( $simple === true ) {
  54. $this->leader = "\n";
  55. $this->tag = "<".$text.">";
  56. $this->body = $tags;
  57. $this->end = "</".$text.">";
  58. $this->trailer = "";
  59. $this->parts = $level;
  60. $this->more = $offset;
  61. return;
  62. }
  63. if( $tags == null ) {
  64. $tags = PHPFIT_Parse::$tags;
  65. }
  66. $startTag = stripos( $text, '<' . $tags[$level] );
  67. $endTag = stripos( $text, '>', $startTag ) + 1;
  68. $startEnd = stripos( $text, '</' . $tags[$level], $endTag );
  69. $endEnd = stripos( $text, '>', $startEnd ) + 1;
  70. $startMore = stripos( $text, '<'.$tags[$level], $endEnd );
  71. if( $startTag === false || $endTag === false || $startEnd === false || $endEnd === false ) {
  72. throw new PHPFIT_Exception_Parse( 'Can\'t find tag: ' . $tags[$level], $offset );
  73. }
  74. $this->leader = substr( $text, 0, $startTag );
  75. $this->tag = substr( $text, $startTag, $endTag - $startTag );
  76. $this->body = substr( $text, $endTag, $startEnd - $endTag );
  77. $this->end = substr( $text, $startEnd, $endEnd - $startEnd );
  78. $this->trailer = substr( $text, $endEnd );
  79. // add counter
  80. if( isset( self::$tagCount[$tags[$level]] ) ) {
  81. $this->count = self::$tagCount[$tags[$level]]++;
  82. switch( $tags[$level] ) {
  83. case 'table':
  84. self::$tagCount['tr'] = 0;
  85. // fall through!
  86. case 'tr':
  87. self::$tagCount['td'] = 0;
  88. break;
  89. default:
  90. break;
  91. }
  92. }
  93. // we are not at cell-level - dig further down
  94. if( ( $level + 1 ) < count( $tags ) ) {
  95. $this->parts = new PHPFIT_Parse( $this->body, $tags, $level+1, $offset + $endTag );
  96. $this->body = null;
  97. }
  98. else {
  99. $index = stripos( $this->body, '<'.$tags[0] );
  100. if( $index !== false ) {
  101. $parts = new PHPFIT_Parse( $this->body, $tags, 0, $offset + $endTag );
  102. $this->body = '';
  103. }
  104. }
  105. if( $startMore !== false ) {
  106. $this->more = new PHPFIT_Parse( $this->trailer, $tags, $level, $offset + $endEnd );
  107. $this->trailer = null;
  108. }
  109. }
  110.  
  111. /**
  112. * @return int
  113. */
  114.  
  115. public function size() {
  116. return ($this->more==null) ? 1 : $this->more->size()+1;
  117. }
  118.  
  119.  
  120. /**
  121. * @return Parse
  122. */
  123.  
  124. public function last() {
  125. return ($this->more==null) ? $this : $this->more->last();
  126. }
  127.  
  128. /**
  129. * @return Parse
  130. */
  131.  
  132. public function leaf() {
  133. return ($this->parts==null) ? $this : $this->parts->leaf();
  134. }
  135.  
  136.  
  137.  
  138. /**
  139. * @param int i
  140. * @param int j
  141. * @param int k
  142. * @return Parse
  143. */
  144.  
  145. public function at($i, $j = null, $k = null) {
  146. if ($j === null) {
  147. return ($i == 0 || $this->more == null) ? $this : $this->more->at($i-1);
  148. } else if ($k === null)
  149. return $this->at($i)->parts->at($j);
  150. else
  151. return $this->at($i, $j)->parts->at($k);
  152. }
  153.  
  154.  
  155. /**
  156. * @return string
  157. */
  158.  
  159. public function text() {
  160. return PHPFIT_Parse::htmlToText($this->body);
  161. }
  162.  
  163.  
  164. /**
  165. * @param string
  166. * @return string
  167. */
  168.  
  169. public static function htmlToText($s) {
  170. $s = PHPFIT_Parse::normalizeLineBreaks($s);
  171. $s = PHPFIT_Parse::removeNonBreakTags($s);
  172. $s = PHPFIT_Parse::condenseWhitespace($s);
  173. $s = PHPFIT_Parse::unescape($s);
  174. return $s;
  175. }
  176.  
  177. /**
  178. * @param string
  179. * @return string
  180. */
  181.  
  182. public static function unescape($s) {
  183. $s = str_replace("<br />", "\n", $s);
  184. $s = PHPFIT_Parse::unescapeEntities($s);
  185. $s = PHPFIT_Parse::unescapeSmartQuotes($s);
  186. return $s;
  187. }
  188.  
  189.  
  190. /**
  191. * @param string
  192. * @return string
  193. */
  194.  
  195. private static function unescapeEntities($s) {
  196. $s = str_replace('&lt;', '<', $s);
  197. $s = str_replace('&gt;', '>', $s);
  198. $s = str_replace('&nbsp;', ' ', $s);
  199. $s = str_replace('&quot;', '\"', $s);
  200. $s = str_replace('&amp;', '&', $s);
  201. return $s;
  202. }
  203.  
  204.  
  205. /**
  206. * @param string
  207. * @return string
  208. */
  209. public static function unescapeSmartQuotes($s) {
  210. /* NOT SURE */
  211. $s = ereg_replace('<93>', '"', $s);
  212. $s = ereg_replace('<94>', '"', $s);
  213. $s = ereg_replace('<91>', "'", $s);
  214. $s = ereg_replace('<92>', "'", $s);
  215.  
  216. /* NO SUPPORT FOR UNICODE IN PHP! :( */
  217. /*
  218. $s = ereg_replace('\u201c', '"', $s);
  219. $s = ereg_replace('\u201d', '"', $s);
  220. $s = ereg_replace('\u2018', '\'', $s);
  221. $s = ereg_replace('\u2019', '\'', $s);
  222. */
  223. return $s;
  224. }
  225.  
  226. /**
  227. * @param string
  228. * @return string
  229. */
  230.  
  231. private static function normalizeLineBreaks($s) {
  232. $s = preg_replace('|<\s*br\s*/?\s*>|s', '<br />', $s);
  233. $s = preg_replace('|<\s*/\s*p\s*>\s*<\s*p( .*?)?>|s', '<br />', $s);
  234. return $s;
  235. }
  236.  
  237. /**
  238. * @param string
  239. * @return string
  240. */
  241.  
  242. public static function condenseWhitespace($s) {
  243. $NON_BREAKING_SPACE = chr(160);
  244.  
  245. $s = preg_replace('|\s+|s', ' ', $s);
  246. $s = ereg_replace($NON_BREAKING_SPACE, ' ', $s);
  247. $s = ereg_replace('&nbsp;', ' ', $s);
  248.  
  249. $s = trim($s, "\t\n\r\ "); // GUARDA! PUEDE QUE FALLE!
  250. //$s = trim($s, "\t.\n.\r.\0.\x0B.\ "); // GUARDA! PUEDE QUE FALLE!
  251. return $s;
  252. }
  253.  
  254.  
  255. /**
  256. * @param string
  257. * @return string
  258. */
  259.  
  260. private static function removeNonBreakTags($s) {
  261. $i=0;
  262. $i = strpos($s,'<',$i);
  263. while ($i !== false) {
  264. $j = strpos($s,'>',$i+1);
  265. if ($j>0) {
  266. if (substr($s, $i, $j+1-$i) != '<br />') {
  267. $s = substr($s, 0, $i) . substr($s, $j+1);
  268. } else {
  269. $i++;
  270. }
  271. } else {
  272. break;
  273. }
  274. $i = strpos($s,'<',$i);
  275. }
  276. return $s;
  277. }
  278.  
  279.  
  280. /**
  281. * @param string text
  282. */
  283.  
  284. public function addToTag($text) {
  285. $last = strlen($this->tag)-1;
  286. $this->tag = substr($this->tag, 0, $last) . $text . '>';
  287. }
  288.  
  289.  
  290. /**
  291. * @param string text
  292. */
  293.  
  294. public function addToBody($text) {
  295. $this->body = $this->body . $text;
  296. }
  297.  
  298. /**
  299. * @return string
  300. */
  301. public function toString() {
  302. $out = $this->leader;
  303. $out .= $this->tag;
  304. if ($this->parts != null) {
  305. $out .= $this->parts->toString();
  306. } else {
  307. $out .= $this->body;
  308. }
  309. $out .= $this->end;
  310. if ($this->more != null) {
  311. $out .= $this->more->toString();
  312. } else {
  313. $out .= $this->trailer;
  314. }
  315. return $out;
  316. }
  317. }
  318.  
  319. ?>

Documentation generated on Sun, 02 Apr 2006 23:20:52 +0000 by phpDocumentor 1.3.0RC5