loadFilter($filter, 'TextFilter'); $n = $filter->getName(); if (isset($this->preTextFilters[$n])) { throw new Exception('Cannot overload pre text filter ' . $filter->getName()); } return $this->preTextFilters[$n] = $filter; } /** * Adds a post-processing text filter to the queue. * @note Filters added here are run after the document has been * parsed into a DOM and then serialized back. Suggested use * is for fixing cosmetic issues with the source. * @warning Anything done on this step will not have its * well-formedness corrected, so be careful. * @param $filter XHTMLCompiler_TextFilter */ public function addPostTextFilter($filter) { $filter = $this->loadFilter($filter, 'TextFilter'); $n = $filter->getName(); if (isset($this->postTextFilters[$n])) { throw new Exception('Cannot overload post text filter ' . $filter->getName()); } return $this->postTextFilters[$n] = $filter; } /** * Adds a DOM-processing filter to the queue * @param $filter XHTMLCompiler_DOMFilter */ public function addDOMFilter($filter) { $filter = $this->loadFilter($filter, 'DOMFilter'); $n = $filter->getName(); if (isset($this->DOMFilters[$n])) { throw new Exception('Cannot overload DOM filter ' . $filter->getName()); } $attributes = $filter->getXCAttributesDefined(); foreach ($attributes as $attribute) { if (isset($this->xcAttr[$attribute])) { throw new Exception('Duplicate attribute definition for '. 'xc:' . $attribute); } $this->xcAttr[$attribute] = true; } return $this->DOMFilters[$n] = $filter; } /** * If filter is string, load the filter based on a few guesses * @param $filter String or object filter */ protected function loadFilter($filter, $subclass) { if (is_string($filter)) { $class = "XHTMLCompiler_{$subclass}_$filter"; if (class_exists($class)) { $filter = new $class; } elseif (class_exists($filter)) { $filter = new $filter; } else { require "$subclass/$filter.php"; $filter = new $class; } } return $filter; } /** Returns the dependency array accumulated from the filter run */ public function getDeps() {return $this->deps;} /** Adds a file to the dependency list */ public function addDependency($filename) { $this->deps[$filename] = filemtime($filename); } /** * Accepts a page's text (usually XHTML) and processes it. * @param $text String text to be processed * @param $page XHTMLCompiler_Page representing currently processed page */ public function process($text, $page) { // do pre-text processing foreach ($this->preTextFilters as $filter) { $text = $filter->process($text, $page, $this); } // generate the DOM $this->setupXMLCatalog(); $dom = $this->createDOM($text); $this->analyzeInternalSubset($dom); // validate the document to force the entities to be resolved, // we don't actually care about the errors set_error_handler(array($this, 'muteErrorHandler')); $dom->validate(); restore_error_handler(); $this->analyzeXIncludes($dom); $dom->xinclude(); // run DOM filters foreach ($this->DOMFilters as $filter) { $filter->setup($dom); $filter->process($dom, $page, $this); } // translate back to text $text = $dom->saveXML(); // remove all non-default namespace declarations, may change, // but for now embedded XML namespaces are not cross-browser friendly $text = preg_replace('/ xmlns:.+?=".+?"/', '', $text); // scrub out custom DTD additions $text = preg_replace('/(]*?) ?\[[^\]]+\]/', '\1', $text); foreach ($this->postTextFilters as $filter) { $text = $filter->process($text, $page, $this); } // replace all CDATA sections $text = preg_replace_callback( '//s', array('XHTMLCompiler_FilterManager', 'cdataCallback'), $text ); // okay, now finally do validation, and let the errors get // spit out if there are some collect parse errors set_error_handler(array($this, 'validationErrorHandler')); $dom->loadXML($text); $status = $dom->validate(); restore_error_handler(); if (!$status || !empty($this->errors)) { $this->buildErrors($dom); $text = $dom->saveXML(); } return $text; } public static function cdataCallback($matches) { return htmlspecialchars($matches[1], ENT_NOQUOTES, 'UTF-8'); } /** * Temporary error handler to use when validating a document */ public function validationErrorHandler($n, $text) { $this->errors[] = $text; } /** * Handler that mutes all errors */ public function muteErrorHandler($n, $t) {} /** * Sets up an XML catalog to speed up entity resolution */ public function setupXMLCatalog() { $catalog = str_replace(array(' ', '\\'), array('%20', '/'), dirname(__FILE__)) . '/../catalog/catalog.xml'; if ($catalog[1] == ':') $catalog = substr($catalog, 2); // remove drive putenv('XML_CATALOG_FILES=' . $catalog); } /** * Creates a reasonable well default configured DOM * @param string $xml XML to load DOM with */ public function createDOM($text = false) { $dom = new DOMDocument(); $dom->preserveWhiteSpace = false; $dom->formatOutput = true; $dom->resolveExternals = true; // todo: somehow, collect information on which entity files // are being added to the document, and add to xc-deps. $dom->substituteEntities = true; // allows for custom entities too! if ($text !== false) $dom->loadXML($text); return $dom; } /** * Analyzes the internal subset of a DOM, registering any file * entity definitions as dependencies */ public function analyzeInternalSubset($dom) { if (empty($dom->doctype) || !is_object($dom->doctype)) return; $internal_subset = $dom->doctype->internalSubset; if ($internal_subset) { // there are some entities that need to be registered to // the dependency list. Match ones that declare SYSTEM // '' preg_match_all( '//s', $internal_subset, $matches ); foreach ($matches[1] as $filename) { // $filename will always be relative to web root, so // no munging necessary $this->addDependency($filename); } } } /** * Analyzes a documents XIncludes and registers necessary dependencies. * Make sure you call this before calling $dom->xinclude * @param DOMDocument $dom to process * @todo Factor into a DOMFilter * @todo Handle arbitrary nestings of includes */ public function analyzeXIncludes($dom) { $xpath = new DOMXPath($dom); $xpath->registerNamespace('xi', $ns = 'http://www.w3.org/2001/XInclude'); $nodes = $xpath->query('//xi:include'); foreach ($nodes as $node) { if (! $node instanceof DOMElement) continue; if (! $filename = $node->getAttribute('href')) continue; $this->addDependency($filename); $sub_dom = new DOMDocument(); $sub_dom->load($filename); $this->analyzeInternalSubset($sub_dom); } } /** * Adds validation errors to the output document as a message */ public function buildErrors($dom) { $body = $dom->getElementsByTagName('body')->item(0); if (!$body) { $dom->appendChild($html = $dom->createElement('html')); $html->appendChild($body = $dom->createElement('body')); } $warning = $dom->createElement('div'); $warning->setAttribute('class', 'warning'); $warning->appendChild($dom->createElement('h2', 'Warning: Errors')); $warning->appendChild($dom->createElement('p', 'This document has validation errors:')); $list = $dom->createElement('ul'); foreach ($this->errors as $error) { // strip-tags removes HTML tags to make the plaintext output // more friendly, IS NOT for security reasons $list->appendChild($dom->createElement('li', strip_tags($error))); } $warning->appendChild($list); $body->insertBefore($warning, $body->childNodes->item(0)); } } ?>