* @license PHP License
* @package wb
* @subpackage Markup
*/
WBClass::load('WBMarkup_Handler_Xml2Wxml');
/**
* Markup Scanner Handler: Wxml2Xml
*
* Convert Wxml html data to Wombat XML
*
* @version 0.2.1
* @package wb
* @subpackage Markup
*/
class WBMarkup_Handler_Wxml2Xml extends WBMarkup_Handler_Xml2Wxml
{
/**
* Cunstructor
*
* Flip mapping array
*/
public function __construct()
{
$this->stdTagMap = array_flip($this->stdTagMap);
}
/**
* Handler on start element
*
* @param string $ns The used namespace, if set. otherwise null
* @param string $tag the tag itself
* @param array $attributes the attributes of the found element
* @param bool $isEmpty true if it is a start- and closing-Element (e.g.
)
* @return bool usually true, false to stop the scanner
*/
public function onStartElement($ns, $tag, $attributes, $isEmpty)
{
if (empty($ns)) {
if (isset($this->stdTagMap[$tag])) {
$tag = $this->stdTagMap[$tag];
}
if (isset($attributes['wb:dialog'])) {
$ns = 'wb';
$tag = $attributes['wb:dialog'];
}
}
return parent::onStartElement($ns, $tag, $attributes, $isEmpty);
}
/**
* Handler for entities
*
* Remove non-breaking spaces. Call parent's method
*
* @param string $entity the entity element, e.g. nbsp for
* @param boolean $isUnicode true = the element is a unicode string
*/
public function onEntityElement($entity, $isUnicode)
{
if ('nbsp' == $entity) {
$this->doc[$this->depth]['cData'][] = ' ';
return true;
}
return parent::onEntityElement($entity, $isUnicode);
}
/**
* Called right after scan is complete
*
* Trim whitespace only cData parts. Call parent's method
*
* @return bool usually true, false to stop the scanner
*/
public function onScanComplete()
{
foreach ($this->doc[0]['cData'] as &$cData) {
$len = trim($cData);
if (0 == strlen($len)) {
$cData = '';
}
}
return parent::onScanComplete();
}
/**
* Convert node
*
* transform namespaced tags to HTML
*
* @todo convert empty namespace (standard HTML) to known WB namespace, if possible
* @param string $ns
* @param string $tag
* @param array $node
*/
protected function convert($ns, $tag, &$node)
{
$con = $this->getConverter($ns, $tag);
if (!$con) {
$node['ns'] = null;
$node['tag'] = false;
return;
}
return $con->toXml($node);
}
/**
* Serialize Document Node
*
* Clean up rubish from WYSIWIG editor.
* Strip nodes with whitespace content only.
* Remove trailing BR-tags
*
* @param array $node
* @return string serialized attributes
*/
protected function node2String($node)
{
if ($node['isEmpty']) {
return parent::node2String($node);
}
if (!empty($node['ns'])) {
return parent::node2String($node);
}
$this->stripBadTag($node);
$this->stripBadAttributes($node);
$brTagRm = array('p', 'td', 'th', 'div');
// strip BR-Tags bofore closing tag
$end = trim(end($node['cData']));
while ('' === $end && !empty($node['cData'])) {
array_pop($node['cData']);
$end = end($node['cData']);
}
if (in_array($node['tag'], $brTagRm) && '
' == $end) {
array_pop($node['cData']);
}
$cData = array_map('trim', $node['cData']);
$cData = implode("\n", $cData);
// empty cData?
if (0 == strlen($cData)) {
return '';
}
// force paragraph
if (1 == $node['depth'] && empty($node['tag'])) {
$node['tag'] = 'p';
$node['isEmpty'] = false;
}
return parent::node2String($node);
}
/**
* Remove unwanted tags
*
* @param array $node
*/
private function stripBadTag(&$node)
{
switch (strtolower($node['tag'])) {
case 'font':
case 'span':
case 'div':
case 'table':
case 'tbody':
case 'thead':
case 'tfoot':
case 'tr':
case 'td':
case 'th':
$node['tag'] = '';
$node['ns'] = '';
$node['attributes'] = array();
break;
default:
return;
break;
}
}
/**
* Remove unwanted attributes
*
* Remove all attributes besides:
* - class
* - href (a)
* - title (a)
*
* @param array $node
*/
private function stripBadAttributes(&$node)
{
if (empty($node['attributes'])) {
return;
}
$allowed = array('class');
if ('a' == $node['tag']) {
$allowed[] = 'href';
$allowed[] = 'title';
}
$atts = array();
foreach ($allowed as $a) {
if (isset($node['attributes'][$a])) {
$atts[$a] = $node['attributes'][$a];
}
}
$node['attributes'] = $atts;
}
}