* @license PHP License * @package wb * @subpackage Markup */ WBClass::load('WBMarkup_Handler'); /** * Markup Scanner Handler: Dereferer * * Find references in XML * * @version 0.3.0 * @package wb * @subpackage Markup */ class WBMarkup_Handler_Dereferer implements WBMarkup_Handler { /** * current depth * @var int */ private $depth = 0; /** * complete content * @var string */ private $content = ''; /** * document nodes * @var array */ private $doc = array(); /** * list of running dererferer * @var array */ private $dref = array(); /** * list of ids for namespaces and tags * @var array */ private $ids = array(); /** * URL * @var WBDictionary_URL */ private $url; /** * path * @var string */ private $urlPath; /** * table * @var WBDatasource_Table */ private $table; /** * set url * * Set refering URL * * @param WBDictionary_URL $url * @param string $path */ public function setUrl($url, $path = '') { $this->urlPath = $path; if ($url instanceof WBDictionary_URL) { $this->url = $url; return; } $this->url = WBClass::create('WBDictionary_URL'); if (preg_match('/^\\d+$/', $url)) { $this->url->load($url); return; } $this->url->addWord($url); } /** * handler on start beginning of scan * * * * @param string $content * @return bool usually true, false to stop the scanner */ public function onScanStart(&$content) { $this->url = null; $this->dref = array(); $this->depth = 0; $this->content = $content; $this->doc = array(); $this->doc[0] = array( 'ns' => '', 'tag' => '', 'attributes' => array(), 'isEmpty' => false, 'cData' => array(), ); return true; } /** * handler on start element * * @param string $ns The used namespace, if set. otherwise null * @param string $tag the tag itself * @param array $attributes the attributes of the found element * @param bool $isEmpty true if it is a start- and closing-Element (e.g.
) * @return bool usually true, false to stop the scanner */ public function onStartElement($ns, $tag, $attributes, $isEmpty) { ++$this->depth; $this->doc[$this->depth] = array( 'ns' => $ns, 'tag' => $tag, 'attributes' => $attributes, 'isEmpty' => $isEmpty, 'cData' => array() ); return true; } /** * handler fo cData * * Simply add cDate to document * * Copied regular expression to find and replace URL from * {@link http://snipplr.com/view/6889/regular-expressions-for-uri-validationparsing/} * and removed parentheses * * @uses replaceUrlString() * @param string $cData character data * @return bool usually true, false to stop the scanner */ public function onCharacterData($cData) { $this->doc[$this->depth]['cData'][] = $cData; return true; } /** * handler for entities * * Add entities to document's cData * * @param string $entity the entity element, e.g. nbsp for   * @param boolean $isUnicode true = the element is a unicode string */ public function onEntityElement($entity, $isUnicode) { $cData = '&'; if( $isUnicode ) { $cData .= '#'; } $cData .= $entity . ';'; $this->doc[$this->depth]['cData'][] = $cData; return true; } /** * test handler on end element * * @param string $ns The used namespace, if set. otherwise null * @param string $tag the tag itself * @param bool $empty defines if the tag is empty * @return bool usually true, false to stop the scanner */ public function onEndElement($ns, $tag, $empty) { // serialize node if( !$this->depth ) { return true; } $node = array_pop($this->doc); if ($node['ns']) { $this->extractReferences($node['ns'], $node['tag'], $node); } --$this->depth; if( $this->depth < 0 ) { $this->depth = 0; } $this->doc[$this->depth]['cData'][] = $this->node2String($node); return true; } /** * called right after scan is complete * * @return bool usually true, false to stop the scanner */ public function onScanComplete() { // remove empty lists and make them unique foreach ($this->ids as $ns => &$nIds) { foreach ($nIds as $tag => &$ids) { if (empty($ids)) { unset($nIds[$tag]); continue; } $ids = array_unique($ids); } if (empty($nIds)) { unset($this->ids[$ns]); } } return true; } /** * get list of refering ids * * @return array */ public function getIds() { return $this->ids; } /** * update reference list * * Remove old references and insert new ones. */ public function save() { $this->table = WBClass::create('WBDatasource_Table'); $clause = array(); $clause[] = array( 'field' => $this->table->getIdentifier('url'), 'value' => $this->url->getId() ); $clause[] = array( 'field' => 'path', 'value' => $this->urlPath ); $this->table->delete('reference', null, null, $clause); $proto = array( $this->table->getIdentifier('url') => $this->url->getId(), 'path' => $this->urlPath, 'xnamespace' => '', 'xtag' => '', 'xid' => '' ); $save = array(); foreach ($this->ids as $ns => $ids) { $proto['xnamespace'] = $ns; foreach ($ids as $tag => $id) { $proto['xtag'] = $tag; for ($i = 0; $i < count($id); ++$i) { $proto['xid'] = $id[$i]; $save[] = $proto; } } } // nothing to save if (empty($save)) { return; } $this->table->save('reference', '__new', $save); } /** * receive translated content string * * @return string */ public function getParsedContent() { return $this->content; } /** * convert node * * transform namespaced tags to HTML * * @todo convert empty namespace (standard HTML) to known WB namespace, if possible * @param string $ns * @param string $tag * @param array $node */ protected function extractReferences($ns, $tag, &$node) { $dref = $this->getDereferencer($ns, $tag); $this->ids[$ns][$tag] = array_merge( $this->ids[$ns][$tag], $dref->getReferences($node)); } /** * cet instance of matching converter * * See whether convert is already running, start it otherwise * * @param $ns * @param $tag * @return WBMarkup_Dereferencer */ protected function getDereferencer($ns, $tag) { $name = ucfirst(strtolower($ns)) . '_' . ucfirst(strtolower($tag)); if (isset($this->dref[$name])) { return $this->dref[$name]; } // try full quallified class: ns+ tag try{ $this->dref[$name] = WBClass::create('WBMarkup_Dereferencer_' . $name); /** @var $this->con[$con] WBMarkup_Dereferencer */ } catch(WBException_Class $e) { // try just the ns try{ $this->dref[$name] = WBClass::create('WBMarkup_Dereferencer_' . $ns); /** @var $this->con[$con] WBMarkup_Dereferencer */ } catch(WBException_Class $e) { // very generic dereferencer $this->dref[$name] = WBClass::create('WBMarkup_Dereferencer'); /** @var $this->con[$con] WBMarkup_Dereferencer */ } } if (!isset($this->ids[$ns])) { $this->ids[$ns] = array(); } $this->ids[$ns][$tag] = array(); return $this->dref[$name]; } /** * serialize document node * * @param array $node * @return string serialized attributes */ protected function node2String($node) { $cData = implode('', $node['cData']); if (!$node['tag']) { return $cData; } $tag = ''; if ($node['ns']) { $tag .= $node['ns'] . ':'; } $tag .= $node['tag']; $element = '<' . $tag . $this->attributes2String($node['attributes']); if ($node['isEmpty']) { $element .= ' />'; return $element; } $element .= '>' . $cData . ''; return $element; } /** * serialize attribute list to HTML attribute string * * @param array $att attribute list * @return string serialized attributes */ protected function attributes2String($att) { if (empty($att)) { return ''; } $str = ''; foreach ($att as $k => $v) { $str .= ' ' . $k . '="' . $v . '"'; } return $str; } } ?>