From 4852c2a5f5c2c96723699701eb1ef19b54cf96eb Mon Sep 17 00:00:00 2001
From: "C. Scott Ananian"
Date: Fri, 2 Jul 2021 01:15:01 -0400
Subject: [PATCH] Implement XML parser; fixes to XMLSerializer and CDATASection
support
Bug: w3c/DOM-Parsing#29
Bug: w3c/DOM-Parsing#38
Bug: w3c/DOM-Parsing#47
Bug: w3c/DOM-Parsing#48
Bug: w3c/DOM-Parsing#50
Bug: w3c/DOM-Parsing#52
Bug: w3c/DOM-Parsing#59
Bug: w3c/DOM-Parsing#71
Change-Id: I76735c4be1d9738c690417207301f737e3a3c9ff
---
src/CDATASection.php | 19 +++
src/DOMImplementation.php | 3 +-
src/DOMParser.php | 133 +++++++++++++++++-
src/Document.php | 23 ++-
src/DocumentType.php | 1 +
src/Internal/NamespacePrefixMap.php | 4 -
src/Internal/WhatWG.php | 48 +++++--
src/NamedNodeMap.php | 2 +-
src/Window.php | 2 +-
src/XMLDocument.php | 3 +-
tests/DodoTest.php | 2 +-
...OMImplementationCreateHTMLDocumentTest.php | 1 +
.../WPT/Dom/Nodes/DocumentTypeLiteralTest.php | 1 +
.../DOMParserParseFromStringXmlTest.php | 11 +-
.../XMLSerializerSerializeToStringTest.php | 101 ++++++-------
tests/logs/errors.yaml | 36 ++---
tests/logs/failures.yml | 12 +-
tools/TestsGenerator/ParserTask.php | 16 ++-
18 files changed, 307 insertions(+), 111 deletions(-)
diff --git a/src/CDATASection.php b/src/CDATASection.php
index bcfebbc..b5473e5 100644
--- a/src/CDATASection.php
+++ b/src/CDATASection.php
@@ -4,6 +4,8 @@
namespace Wikimedia\Dodo;
+use Wikimedia\Dodo\Internal\BadXMLException;
+use Wikimedia\Dodo\Internal\NamespacePrefixMap;
use Wikimedia\Dodo\Internal\UnimplementedTrait;
class CDATASection extends Text implements \Wikimedia\IDLeDOM\CDATASection {
@@ -27,4 +29,21 @@ final public function getNodeType() : int {
public function getNodeName() : string {
return "#cdata-section";
}
+
+ /** @inheritDoc */
+ public function _xmlSerialize(
+ ?string $namespace, NamespacePrefixMap $prefixMap, int &$prefixIndex,
+ bool $requireWellFormed, array &$markup
+ ) : void {
+ // See https://github.com/w3c/DOM-Parsing/issues/38
+ $data = $this->getData();
+ if ( $requireWellFormed ) {
+ if ( strpos( $data, ']]>' ) !== false ) {
+ throw new BadXMLException();
+ }
+ }
+ $markup[] = 'getData();
+ $markup[] = ']]>';
+ }
}
diff --git a/src/DOMImplementation.php b/src/DOMImplementation.php
index 8d24099..6de8036 100644
--- a/src/DOMImplementation.php
+++ b/src/DOMImplementation.php
@@ -76,7 +76,6 @@ public function createDocumentType( $qualifiedName, $publicId, $systemId ) {
$qualifiedName,
$publicId,
$systemId );
- /* TEMPORARY STUB */
}
/**
@@ -147,7 +146,7 @@ public function createDocument( ?string $namespace, ?string $qualifiedName = '',
/** @inheritDoc */
public function createHTMLDocument( ?string $titleText = null ) {
- $d = new Document( $this->_contextObject, 'html', null );
+ $d = new Document( $this->_contextObject, 'html', 'text/html', null );
$d->appendChild( new DocumentType( $d, "html" ) );
diff --git a/src/DOMParser.php b/src/DOMParser.php
index dc6c307..4053ff8 100644
--- a/src/DOMParser.php
+++ b/src/DOMParser.php
@@ -4,12 +4,14 @@
namespace Wikimedia\Dodo;
+use Exception;
use RemexHtml\DOM\DOMBuilder;
+use RemexHtml\Tokenizer\NullTokenHandler;
use RemexHtml\Tokenizer\Tokenizer;
use RemexHtml\TreeBuilder\Dispatcher;
use RemexHtml\TreeBuilder\TreeBuilder;
-use Wikimedia\Dodo\Internal\UnimplementedException;
use Wikimedia\IDLeDOM\DOMParserSupportedType;
+use XMLReader;
/**
* DOMParser
@@ -24,9 +26,23 @@ class DOMParser implements \Wikimedia\IDLeDOM\DOMParser {
*/
public function parseFromString( string $string, /* DOMParserSupportedType */ string $type ) {
$type = DOMParserSupportedType::cast( $type );
- if ( $type !== DOMParserSupportedType::text_html ) {
- throw new UnimplementedException( __METHOD__ . "( '$type' )" );
+ switch ( $type ) {
+ case DOMParserSupportedType::text_html:
+ return $this->_parseHtml( $string );
+ default:
+ // XXX if we throw an XML well-formedness error here, we're
+ /// supposed to make a document describing it, instead of
+ // throwing an exception.
+ return $this->_parseXml( $string, $type );
}
+ }
+
+ /**
+ * Create an HTML parser, parsing the string as UTF-8.
+ * @param string $string
+ * @return Document
+ */
+ private function _parseHtml( string $string ) {
$domBuilder = new class( [
'suppressHtmlNamespace' => true,
'suppressIdAttribute' => true,
@@ -42,7 +58,7 @@ protected function createDocument(
string $system = null
) {
// Force this to be an HTML document (not an XML document)
- $this->doc = new Document( null, 'html' );
+ $this->doc = new Document( null, 'html', 'text/html' );
return $this->doc;
}
@@ -76,4 +92,113 @@ public function doctype( $name, $public, $system, $quirks, $sourceStart, $source
return $result;
}
+ /**
+ * An XML parser ... is a construct that follows the rules given in
+ * XML to map a string of bytes or characters into a Document
+ * object.
+ *
+ * The spec then follows that up with:
+ * "Note: At the time of writing, no such rules actually exist."
+ *
+ * Use the enabled-by-default PHP XMLReader class to do our
+ * parsing and cram it into a Document somehow, and hope we don't
+ * mangle things too badly.
+ *
+ * @see https://html.spec.whatwg.org/multipage/xhtml.html#xml-parser
+ *
+ * @param string $s The string to parse
+ * @param string $contentType
+ * @return Document
+ */
+ private function _parseXML( string $s, string $contentType ) {
+ $reader = new XMLReader();
+ $reader->XML(
+ $s, 'utf-8',
+ LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_PARSEHUGE
+ );
+ # According to spec, this is a Document not an XMLDocument
+ $doc = new Document( null, 'xml', $contentType );
+ $node = $doc;
+ $attrNode = null;
+ while ( $reader->moveToNextAttribute() || $reader->read() ) {
+ switch ( $reader->nodeType ) {
+ case XMLReader::END_ELEMENT:
+ $node = $node->getParentNode();
+ // Workaround to prevent us from visiting the attributes again
+ while ( $reader->moveToNextAttribute() ) {
+ /* skip */
+ }
+ break;
+ case XMLReader::ELEMENT:
+ $qname = $reader->prefix ?? '';
+ if ( $qname !== '' ) {
+ $qname .= ':';
+ }
+ $qname .= $reader->localName;
+ // This will be the node we'll attach attributes to!
+ $attrNode = $doc->createElementNS( $reader->namespaceURI, $qname );
+ $node->appendChild( $attrNode );
+ // We don't get an END_ELEMENT from the reader if this is
+ // an empty element (sigh)
+ if ( !$reader->isEmptyElement ) {
+ $node = $attrNode;
+ }
+ break;
+ case XMLReader::ATTRIBUTE:
+ $qname = $reader->prefix ?? '';
+ if ( $qname !== '' ) {
+ $qname .= ':';
+ }
+ $qname .= $reader->localName;
+ '@phan-var Element $attrNode';
+ $attrNode->setAttributeNS(
+ $reader->namespaceURI, $qname, $reader->value
+ );
+ break;
+ case XMLReader::TEXT:
+ $nn = $doc->createTextNode( $reader->value );
+ $node->appendChild( $nn );
+ break;
+ case XMLReader::CDATA:
+ $nn = $doc->createCDATASection( $reader->value );
+ $node->appendChild( $nn );
+ break;
+ case XMLReader::DOC_TYPE:
+ # This is a hack: the PHP XMLReader interface provides no
+ # way to extract the contents of a DOC_TYPE node! So we're
+ # going to give it to the HTML tokenizer to interpret.
+ $tokenHandler = new class extends NullTokenHandler {
+ /** @var string */
+ public $name;
+ /** @var string */
+ public $publicId;
+ /** @var string */
+ public $systemId;
+
+ /** @inheritDoc */
+ public function doctype(
+ $name, $publicId, $systemId,
+ $quirks, $sourceStart, $sourceLength
+ ) {
+ $this->name = $name;
+ $this->publicId = $publicId;
+ $this->systemId = $systemId;
+ }
+ };
+ ( new Tokenizer(
+ $tokenHandler, $reader->readOuterXml(), []
+ ) )->execute( [] );
+ $nn = $doc->getImplementation()->createDocumentType(
+ $tokenHandler->name,
+ $tokenHandler->publicId,
+ $tokenHandler->systemId
+ );
+ $node->appendChild( $nn );
+ break;
+ default:
+ throw new Exception( "Unknown node type: " . $reader->nodeType );
+ }
+ }
+ return $doc;
+ }
}
diff --git a/src/Document.php b/src/Document.php
index 4f8a274..fddff99 100644
--- a/src/Document.php
+++ b/src/Document.php
@@ -233,11 +233,13 @@ private function _updateDoctypeAndDocumentElement(): void {
/**
* @param ?Document $originDoc
* @param string $type
+ * @param string $contentType
* @param ?string $url
*/
public function __construct(
?Document $originDoc = null,
string $type = "xml",
+ string $contentType = 'text/xml',
?string $url = null
) {
parent::__construct( $this );
@@ -249,6 +251,9 @@ public function __construct(
if ( $type === 'html' ) {
$this->_contentType = 'text/html';
$this->_typeIsHtml = true;
+ } else {
+ $this->_contentType = $contentType;
+ $this->_typeIsHtml = false;
}
/* DOM-LS: used by the documentURI and URL method */
@@ -280,6 +285,7 @@ public function _getTemplateDoc() {
$newDoc = new Document(
$this,
$this->_typeIsHtml ? 'html' : 'xml',
+ $this->_contentType,
$this->_URL
);
$this->_templateDocCache = $newDoc->_templateDocCache = $newDoc;
@@ -465,6 +471,17 @@ public function createTextNode( string $data ) : Text {
return new Text( $this, $data );
}
+ /** @inheritDoc */
+ public function createCDATASection( string $data ) : CDATASection {
+ if ( $this->_isHTMLDocument() ) {
+ Util::error( 'NotSupportedError' );
+ }
+ if ( strpos( $data, ']]>' ) !== false ) {
+ Util::error( 'InvalidCharacterError' );
+ }
+ return new CDATASection( $this, $data );
+ }
+
/** @inheritDoc */
public function createComment( string $data ) : Comment {
return new Comment( $this, $data );
@@ -836,10 +853,10 @@ protected function _subclassCloneNodeShallow(): Node {
$shallow = new Document(
$this,
$this->_typeIsHtml ? 'html' : 'xml',
+ $this->_contentType,
$this->_URL
);
$shallow->_mode = $this->_mode;
- $shallow->_contentType = $this->_contentType;
return $shallow;
}
@@ -870,6 +887,10 @@ public function _xmlSerialize(
throw new BadXMLException();
}
}
+ // Emitting the XML declaration is not yet in the spec:
+ // https://github.com/w3c/DOM-Parsing/issues/50
+ $markup[] = '';
+
for ( $child = $this->getFirstChild(); $child !== null; $child = $child->getNextSibling() ) {
$child->_xmlSerialize(
$namespace, $prefixMap, $prefixIndex, $requireWellFormed,
diff --git a/src/DocumentType.php b/src/DocumentType.php
index df44a1d..23db719 100644
--- a/src/DocumentType.php
+++ b/src/DocumentType.php
@@ -141,6 +141,7 @@ public function _xmlSerialize(
if ( $this->_publicId === '' ) {
$markup[] = " SYSTEM";
}
+ // https://github.com/w3c/DOM-Parsing/issues/71
$quote = strpos( $this->_systemId, '"' ) === false ? '"' : "'";
$markup[] = ' ' . $quote . $this->_systemId . $quote;
}
diff --git a/src/Internal/NamespacePrefixMap.php b/src/Internal/NamespacePrefixMap.php
index 1e29826..b485d92 100644
--- a/src/Internal/NamespacePrefixMap.php
+++ b/src/Internal/NamespacePrefixMap.php
@@ -108,9 +108,6 @@ public function retrievePreferredPrefix(
?string $namespace,
?string $preferredPrefix
) : ?string {
- if ( $preferredPrefix === null ) {
- return null;
- }
$last = null;
$candidatesList = $this->map[self::makeKey( $namespace )] ?? [];
foreach ( $candidatesList as $prefix ) {
@@ -149,5 +146,4 @@ public function generatePrefix(
$this->add( $newNamespace, $generatedPrefix );
return $generatedPrefix;
}
-
}
diff --git a/src/Internal/WhatWG.php b/src/Internal/WhatWG.php
index 299fe00..017f635 100644
--- a/src/Internal/WhatWG.php
+++ b/src/Internal/WhatWG.php
@@ -482,6 +482,7 @@ public static function ensure_insert_valid( Node $node, Node $parent, ?Node $chi
case Node::DOCUMENT_TYPE_NODE:
case Node::ELEMENT_NODE:
case Node::TEXT_NODE:
+ case Node::CDATA_SECTION_NODE: // also a Text node
case Node::PROCESSING_INSTRUCTION_NODE:
case Node::COMMENT_NODE:
break;
@@ -492,10 +493,11 @@ public static function ensure_insert_valid( Node $node, Node $parent, ?Node $chi
/*
* DOM-LS #5. If either:
* -node is a Text and parent is a Document
+ * (CDATA counts as a Text node)
* -node is a DocumentType and parent is not a Document
* throw a HierarchyRequestError
*/
- if ( ( $node->getNodeType() === Node::TEXT_NODE && $parent->getNodeType() === Node::DOCUMENT_NODE )
+ if ( ( ( $node->getNodeType() === Node::TEXT_NODE || $node->getNodeType() === Node::CDATA_SECTION_NODE ) && $parent->getNodeType() === Node::DOCUMENT_NODE )
|| ( $node->getNodeType() === Node::DOCUMENT_TYPE_NODE && $parent->getNodeType() !== Node::DOCUMENT_NODE ) ) {
Util::error( "HierarchyRequestError" );
}
@@ -519,7 +521,7 @@ public static function ensure_insert_valid( Node $node, Node $parent, ?Node $chi
$count_element = 0;
for ( $n = $node->getFirstChild(); $n !== null; $n = $n->getNextSibling() ) {
- if ( $n->getNodeType() === Node::TEXT_NODE ) {
+ if ( $n->getNodeType() === Node::TEXT_NODE || $n->getNodeType() === Node::CDATA_SECTION_NODE ) {
$count_text++;
}
if ( $n->getNodeType() === Node::ELEMENT_NODE ) {
@@ -668,6 +670,7 @@ public static function ensure_replace_valid( Node $node, Node $parent, Node $chi
case Node::DOCUMENT_TYPE_NODE:
case Node::ELEMENT_NODE:
case Node::TEXT_NODE:
+ case Node::CDATA_SECTION_NODE: // this is also a Text node
case Node::PROCESSING_INSTRUCTION_NODE:
case Node::COMMENT_NODE:
break;
@@ -678,10 +681,11 @@ public static function ensure_replace_valid( Node $node, Node $parent, Node $chi
/*
* DOM-LS #5. If either:
* -node is a Text and parent is a Document
+ * (CDATA counts as a Text node)
* -node is a DocumentType and parent is not a Document
* throw a HierarchyRequestError
*/
- if ( ( $node->getNodeType() === Node::TEXT_NODE && $parent->getNodeType() === Node::DOCUMENT_NODE )
+ if ( ( ( $node->getNodeType() === Node::TEXT_NODE || $node->getNodeType() === Node::CDATA_SECTION_NODE ) && $parent->getNodeType() === Node::DOCUMENT_NODE )
|| ( $node->getNodeType() === Node::DOCUMENT_TYPE_NODE && $parent->getNodeType() !== Node::DOCUMENT_NODE ) ) {
Util::error( "HierarchyRequestError" );
}
@@ -705,7 +709,7 @@ public static function ensure_replace_valid( Node $node, Node $parent, Node $chi
$count_element = 0;
for ( $n = $node->getFirstChild(); $n !== null; $n = $n->getNextSibling() ) {
- if ( $n->getNodeType() === Node::TEXT_NODE ) {
+ if ( $n->getNodeType() === Node::TEXT_NODE || $n->getNodeType() === Node::CDATA_SECTION_NODE ) {
$count_text++;
}
if ( $n->getNodeType() === Node::ELEMENT_NODE ) {
@@ -968,7 +972,12 @@ public static function xmlSerializeElement(
$markup[] = $qualifiedName;
} else {
$prefix = $el->getPrefix();
- $candidatePrefix = $map->retrievePreferredPrefix( $ns, $prefix );
+ if ( $prefix === null && $ns === $localDefaultNamespace ) {
+ // https://github.com/w3c/DOM-Parsing/issues/52
+ $candidatePrefix = null;
+ } else {
+ $candidatePrefix = $map->retrievePreferredPrefix( $ns, $prefix );
+ }
if ( $prefix === 'xmlns' ) {
if ( $requireWellFormed ) {
throw new BadXMLException();
@@ -1007,7 +1016,8 @@ public static function xmlSerializeElement(
}
} elseif (
$localDefaultNamespace === null ||
- $localDefaultNamespace !== $ns
+ // https://github.com/w3c/DOM-Parsing/issues/47
+ $localDefaultNamespace !== ( $ns ?? '' )
) {
// The namespace still needs to be serialized, but there's
// no prefix or candidate prefix available. Use the default
@@ -1114,7 +1124,11 @@ public static function xmlSerializeAttributes(
continue;
}
if ( $attr->getPrefix() === null ) {
- if ( $ignoreNamespaceDefinitionAttribute ) {
+ if (
+ $ignoreNamespaceDefinitionAttribute &&
+ // https://github.com/w3c/DOM-Parsing/issues/47
+ ( $attr->getValue() ?? '' ) !== ( $attr->getOwnerElement()->getNamespaceURI() ?? '' )
+ ) {
continue;
}
} elseif (
@@ -1131,14 +1145,25 @@ public static function xmlSerializeAttributes(
if ( $attr->getValue() === Util::NAMESPACE_XMLNS ) {
throw new BadXMLException();
}
- if ( $attr->getValue() === '' ) {
+ if (
+ // https://github.com/w3c/DOM-Parsing/issues/48
+ $attr->getPrefix() !== null &&
+ $attr->getValue() === ''
+ ) {
throw new BadXMLException();
}
}
if ( $attr->getPrefix() === 'xmlns' ) {
$candidatePrefix = 'xmlns';
}
- } else {
+ } elseif ( $candidatePrefix === null ) {
+ // The above condition is not (yet) in the spec.
+ // Firefox also tries to preserve the attributes
+ // existing prefix (if any) in this case, which isn't
+ // (yet?) reflected in the spec or the test case.
+ // See discussion at
+ // https://github.com/w3c/DOM-Parsing/issues/29
+
// attribute namespace is not the XMLNS namespace
$candidatePrefix = $map->generatePrefix(
$attrNs, $prefixIndex
@@ -1197,6 +1222,11 @@ public static function xmlSerializeAttrValue(
'"' => '"',
'<' => '<',
'>' => '>',
+ // These aren't in the spec, but should be:
+ // https://github.com/w3c/DOM-Parsing/issues/59
+ "\t" => ' ',
+ "\n" => '
',
+ "\r" => '
',
]
);
}
diff --git a/src/NamedNodeMap.php b/src/NamedNodeMap.php
index 14d3538..f6fa676 100644
--- a/src/NamedNodeMap.php
+++ b/src/NamedNodeMap.php
@@ -287,7 +287,7 @@ public function setNamedItemNS( $attr ) {
$oldAttr = $this->getNamedItemNS( $attr->getNamespaceURI(), $attr->getLocalName() );
- if ( $oldAttr == $attr ) {
+ if ( $oldAttr === $attr ) {
return $attr;
}
diff --git a/src/Window.php b/src/Window.php
index 61b60ed..a5f4941 100644
--- a/src/Window.php
+++ b/src/Window.php
@@ -49,7 +49,7 @@ class Window extends EventTarget {
*/
public function __construct( Document $doc = null ) {
if ( $doc == null ) {
- $doc = new Document( null, 'html' );
+ $doc = new Document( null, 'html', 'text/html' );
}
$this->document = $doc;
diff --git a/src/XMLDocument.php b/src/XMLDocument.php
index 9fdef27..8ead849 100644
--- a/src/XMLDocument.php
+++ b/src/XMLDocument.php
@@ -13,7 +13,6 @@ class XMLDocument extends Document implements \Wikimedia\IDLeDOM\XMLDocument {
* @param string $contentType
*/
public function __construct( ?Document $originDoc, string $contentType ) {
- parent::__construct( $originDoc, 'xml', null );
- $this->_contentType = $contentType;
+ parent::__construct( $originDoc, 'xml', $contentType, null );
}
}
diff --git a/tests/DodoTest.php b/tests/DodoTest.php
index 875e6fa..432879f 100644
--- a/tests/DodoTest.php
+++ b/tests/DodoTest.php
@@ -26,7 +26,7 @@ class DodoTest extends \PHPUnit\Framework\TestCase {
*/
public function testDodo() {
/* Instantiate the nodes */
- $doc = new Document( null, 'html' );
+ $doc = new Document( null, 'html', 'text/html' );
$all_elements = $doc->getElementsByTagName( '*' );
$this->assertSame( 0, $all_elements->length );
diff --git a/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php b/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php
index f169b88..349fefe 100644
--- a/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php
+++ b/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php
@@ -1,6 +1,7 @@
wptAssertTrue($doc instanceof Document, 'Should be Document');
- $this->wptAssertEquals($doc->URL, $this->doc::URL, 'URL');
- $this->wptAssertEquals($doc->documentURI, $this->doc::URL, 'documentURI');
- $this->wptAssertEquals($doc->baseURI, $this->doc::URL, 'baseURI');
+ $this->wptAssertEquals($doc->URL, $this->doc->URL, 'URL');
+ $this->wptAssertEquals($doc->documentURI, $this->doc->URL, 'documentURI');
+ $this->wptAssertEquals($doc->baseURI, $this->doc->URL, 'baseURI');
$this->wptAssertEquals($doc->characterSet, 'UTF-8', 'characterSet');
$this->wptAssertEquals($doc->charset, 'UTF-8', 'charset');
$this->wptAssertEquals($doc->inputEncoding, 'UTF-8', 'inputEncoding');
@@ -29,7 +30,7 @@ public function testDOMParserParseFromStringXml()
$p = new DOMParser();
$doc = $p->parseFromString('', $type);
$this->wptAssertTrue($doc instanceof Document, 'Should be Document');
- checkMetadata($doc, $type);
+ $this->checkMetadata($doc, $type);
$this->wptAssertEquals($doc->documentElement->namespaceURI, null);
$this->wptAssertEquals($doc->documentElement->localName, 'foo');
$this->wptAssertEquals($doc->documentElement->tagName, 'foo');
@@ -42,7 +43,7 @@ public function testDOMParserParseFromStringXml()
$this->assertTest(function () use(&$type) {
$p = new DOMParser();
$doc = $p->parseFromString('', $type);
- checkMetadata($doc, $type);
+ $this->checkMetadata($doc, $type);
$this->wptAssertEquals($doc->documentElement->namespaceURI, 'http://www.mozilla.org/newlayout/xml/parsererror.xml');
$this->wptAssertEquals($doc->documentElement->localName, 'parsererror');
$this->wptAssertEquals($doc->documentElement->tagName, 'parsererror');
diff --git a/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php b/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php
index abc5bda..b75ebd8 100644
--- a/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php
+++ b/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php
@@ -5,6 +5,7 @@
use Wikimedia\Dodo\Element;
use Wikimedia\Dodo\Attr;
use Wikimedia\Dodo\DOMParser;
+use Wikimedia\Dodo\XMLSerializer;
use Wikimedia\Dodo\Tests\Harness\WPTTestHarness;
// @see vendor/web-platform-tests/wpt/domparsing/XMLSerializer-serializeToString.html.
class XMLSerializerSerializeToStringTest extends WPTTestHarness
@@ -28,33 +29,33 @@ public function testXMLSerializerSerializeToString()
$this->doc = $this->loadHtmlFile('vendor/web-platform-tests/wpt/domparsing/XMLSerializer-serializeToString.html');
$XMLNS_URI = 'http://www.w3.org/2000/xmlns/';
$this->assertTest(function () {
- $root = createXmlDoc()->documentElement;
- $this->wptAssertEquals(serialize($root), 'value1');
+ $root = $this->createXmlDoc()->documentElement;
+ $this->wptAssertEquals($this->serialize($root), 'value1');
}, 'check XMLSerializer.serializeToString method could parsing xmldoc to string');
$this->assertTest(function () {
- $root = parse('');
- $this->wptAssertEquals(serialize($root->ownerDocument), '');
+ $root = $this->parse('');
+ $this->wptAssertEquals($this->serialize($root->ownerDocument), '
');
}, 'check XMLSerializer.serializeToString method could parsing document to string');
$this->assertTest(function () {
- $root = createXmlDoc()->documentElement;
+ $root = $this->createXmlDoc()->documentElement;
$element = $root->ownerDocument->createElementNS('urn:foo', 'another');
$child1 = $root->firstChild;
$root->replaceChild($element, $child1);
$element->appendChild($child1);
- $this->wptAssertEquals(serialize($root), 'value1');
+ $this->wptAssertEquals($this->serialize($root), 'value1');
}, 'Check if the default namespace is correctly reset.');
$this->assertTest(function () {
- $root = parse('value1');
- $this->wptAssertEquals(serialize($root), 'value1');
+ $root = $this->parse('value1');
+ $this->wptAssertEquals($this->serialize($root), 'value1');
}, 'Check if there is no redundant empty namespace declaration.');
// https://github.com/w3c/DOM-Parsing/issues/47
$this->assertTest(function () {
- $this->wptAssertEquals(serialize(parse('')), '');
- $this->wptAssertEquals(serialize(parse('')), '');
- $this->wptAssertEquals(serialize(parse('')), '');
+ $this->wptAssertEquals($this->serialize($this->parse('')), '');
+ $this->wptAssertEquals($this->serialize($this->parse('')), '');
+ $this->wptAssertEquals($this->serialize($this->parse('')), '');
}, 'Check if redundant xmlns="..." is dropped.');
$this->assertTest(function () use(&$XMLNS_URI) {
- $root = parse('');
+ $root = $this->parse('');
$child = $root->ownerDocument->createElement('child');
$child->setAttributeNS($XMLNS_URI, 'xmlns', 'FAIL1');
$root->appendChild($child);
@@ -70,121 +71,121 @@ public function testXMLSerializerSerializeToString()
$child5 = $root->ownerDocument->createElement('child5');
$child5->setAttributeNS($XMLNS_URI, 'xmlns', '');
$root->appendChild($child5);
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if inconsistent xmlns="..." is dropped.');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->setAttributeNS('uri', 'name', 'v');
- $this->wptAssertEquals(serialize($root), '');
- $root2 = parse('');
+ $this->wptAssertEquals($this->serialize($root), '');
+ $root2 = $this->parse('');
$child = $root2->firstChild;
$child->setAttributeNS('uri', 'name', 'v');
- $this->wptAssertEquals(serialize($root2), '');
- $root3 = parse('');
+ $this->wptAssertEquals($this->serialize($root2), '');
+ $root3 = $this->parse('');
$child3 = $root3->firstChild;
$child3->setAttributeNS('uri', 'name', 'v');
- $this->wptAssertEquals(serialize($root3), '', 'Should choose the nearest prefix');
+ $this->wptAssertEquals($this->serialize($root3), '', 'Should choose the nearest prefix');
}, 'Check if an attribute with namespace and no prefix is serialized with the nearest-declared prefix');
// https://github.com/w3c/DOM-Parsing/issues/45
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->firstChild->setAttributeNS('u1', 'name', 'v');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if an attribute with namespace and no prefix is serialized with the nearest-declared prefix even if the prefix is assigned to another namespace.');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->setAttributeNS('uri', 'p:name', 'v');
- $this->wptAssertEquals(serialize($root), '');
- $root2 = parse('');
+ $this->wptAssertEquals($this->serialize($root), '');
+ $root2 = $this->parse('');
$child = $root2->firstChild;
$child->setAttributeNS('uri', 'p:name', 'value');
- $this->wptAssertEquals(serialize($root2), '');
+ $this->wptAssertEquals($this->serialize($root2), '');
}, 'Check if the prefix of an attribute is replaced with another existing prefix mapped to the same namespace URI.');
// https://github.com/w3c/DOM-Parsing/issues/29
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->setAttributeNS('uri2', 'p:name', 'value');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if the prefix of an attribute is NOT preserved in a case where neither its prefix nor its namespace URI is not already used.');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->setAttributeNS('uri2', 'xx:name', 'value');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if the prefix of an attribute is replaced with a generated one in a case where the prefix is already mapped to a different namespace URI.');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->setAttribute('attr', "\t");
- $this->wptAssertInArray(serialize($root), ['', '']);
+ $this->wptAssertInArray($this->serialize($root), ['', '']);
$root->setAttribute('attr', "\n");
- $this->wptAssertInArray(serialize($root), ['', '']);
+ $this->wptAssertInArray($this->serialize($root), ['', '']);
$root->setAttribute('attr', "\r");
- $this->wptAssertInArray(serialize($root), ['', '']);
+ $this->wptAssertInArray($this->serialize($root), ['', '']);
}, 'check XMLSerializer.serializeToString escapes attribute values for roundtripping');
$this->assertTest(function () use(&$XMLNS_URI) {
$root = (new Document())->createElement('root');
$root->setAttributeNS('uri1', 'p:foobar', 'value1');
$root->setAttributeNS($XMLNS_URI, 'xmlns:p', 'uri2');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if attribute serialization takes into account of following xmlns:* attributes');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->firstChild->setAttributeNS('uri2', 'p:foobar', 'v');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if attribute serialization takes into account of the same prefix declared in an ancestor element');
$this->assertTest(function () {
- $this->wptAssertEquals(serialize(parse('')), '');
- $this->wptAssertEquals(serialize(parse('')), '');
+ $this->wptAssertEquals($this->serialize($this->parse('')), '');
+ $this->wptAssertEquals($this->serialize($this->parse('')), '');
}, 'Check if start tag serialization drops element prefix if the namespace is same as inherited default namespace.');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$child2 = $root->ownerDocument->createElementNS('u1', 'child2');
$root->firstChild->appendChild($child2);
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if start tag serialization finds an appropriate prefix.');
$this->assertTest(function () use(&$XMLNS_URI) {
$root = (new Document())->createElementNS('uri1', 'p:root');
$root->setAttributeNS($XMLNS_URI, 'xmlns:p', 'uri2');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if start tag serialization takes into account of its xmlns:* attributes');
$this->assertTest(function () use(&$XMLNS_URI) {
$root = (new Document())->createElement('root');
$root->setAttributeNS($XMLNS_URI, 'xmlns:p', 'uri2');
$child = $root->ownerDocument->createElementNS('uri1', 'p:child');
$root->appendChild($child);
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if start tag serialization applied the original prefix even if it is declared in an ancestor element.');
// https://github.com/w3c/DOM-Parsing/issues/52
$this->assertTest(function () {
- $this->wptAssertEquals(serialize(parse('')), '');
+ $this->wptAssertEquals($this->serialize($this->parse('')), '');
}, 'Check if start tag serialization does NOT apply the default namespace if its namespace is declared in an ancestor.');
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->firstChild->setAttributeNS('uri1', 'attr1', 'value1');
$root->firstChild->setAttributeNS('uri2', 'attr2', 'value2');
$root->lastChild->setAttributeNS('uri3', 'attr3', 'value3');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if generated prefixes match to "ns${index}".');
// https://github.com/w3c/DOM-Parsing/issues/44
// According to 'DOM Parsing and Serialization' draft as of 2018-12-11,
// 'generate a prefix' result can conflict with an existing xmlns:ns* declaration.
$this->assertTest(function () {
- $root = parse('');
+ $root = $this->parse('');
$root->firstChild->setAttributeNS('uri3', 'attr1', 'value1');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if "ns1" is generated even if the element already has xmlns:ns1.');
$this->assertTest(function () {
$root = (new Document())->createElement('root');
$root->setAttributeNS('http://www.w3.org/1999/xlink', 'href', 'v');
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
$root2 = (new Document())->createElement('root');
$root2->setAttributeNS('http://www.w3.org/1999/xlink', 'xl:type', 'v');
- $this->wptAssertEquals(serialize($root2), '');
+ $this->wptAssertEquals($this->serialize($root2), '');
}, 'Check if no special handling for XLink namespace unlike HTML serializer.');
$this->assertTest(function () {
$root = new DocumentFragment($this->doc);
$root->append($this->doc->createElement('div'));
$root->append($this->doc->createElement('span'));
- $this->wptAssertEquals(serialize($root), '');
+ $this->wptAssertEquals($this->serialize($root), '');
}, 'Check if document fragment serializes.');
}
}
diff --git a/tests/logs/errors.yaml b/tests/logs/errors.yaml
index efef835..f251c97 100644
--- a/tests/logs/errors.yaml
+++ b/tests/logs/errors.yaml
@@ -288,11 +288,6 @@
_comment: ''
testcases: testHcNodereplacechildnewchildexists
files: /tests/W3C/Level1/Core/HcNodereplacechildnewchildexistsTest.php
-'Error: Call to undefined function Wikimedia\Dodo\Tests\WPT\Domparsing\createXmlDoc()':
- _total: 1
- _comment: ''
- testcases: testXMLSerializerSerializeToString
- files: /tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php
'Error: Call to undefined function Wikimedia\Dodo\Tests\WPT\Domparsing\testIsChild()':
_total: 1
_comment: ''
@@ -459,6 +454,11 @@
_comment: ''
testcases: testDocumentConstructor
files: /tests/WPT/Dom/Nodes/DocumentConstructorTest.php
+'Undefined property via __get(): readyState in /src/Document.php on line 69':
+ _total: 1
+ _comment: ''
+ testcases: testXmldomparser
+ files: /tests/WPT/Domparsing/XmldomparserTest.php
'Undefined property via __get(): role in /tests/WPT/Dom/Nodes/AriaAttributeReflectionTentativeTest.php on line 15':
_total: 1
_comment: ''
@@ -642,22 +642,6 @@
_comment: ''
testcases: testDocumentCreateTextNode
files: /tests/WPT/Dom/Nodes/DocumentCreateTextNodeTest.php
-'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\DOMParser::parseFromString( ''application/xhtml+xml'' )':
- _total: 1
- _comment: ''
- testcases: testDOMParserParseFromStringXmlDoctype
- files: /tests/WPT/Domparsing/DOMParserParseFromStringXmlDoctypeTest.php
-'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\DOMParser::parseFromString( ''text/xml'' )':
- _total: 3
- _comment: ''
- testcases: |-
- testDOMParserParseFromStringXml
- testNodeNormalize
- testXmldomparser
- files: |-
- /tests/WPT/Dom/Nodes/NodeNormalizeTest.php
- /tests/WPT/Domparsing/DOMParserParseFromStringXmlTest.php
- /tests/WPT/Domparsing/XmldomparserTest.php
'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Document::_createElementNS':
_total: 7
_comment: ''
@@ -682,11 +666,6 @@
_comment: ''
testcases: testAppendOnDocument
files: /tests/WPT/Dom/Nodes/AppendOnDocumentTest.php
-'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Document::createCDATASection':
- _total: 1
- _comment: ''
- testcases: testDocumentCreateCDATASection
- files: /tests/WPT/Dom/Nodes/DocumentCreateCDATASectionTest.php
'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Document::createTreeWalker':
_total: 7
_comment: ''
@@ -1129,6 +1108,11 @@
/tests/WPT/Dom/Collections/HTMLCollectionEmptyNameTest.php
/tests/WPT/Dom/Collections/HTMLCollectionIteratorTest.php
/tests/WPT/Dom/Collections/HTMLCollectionOwnPropsTest.php
+'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Node::getBaseURI':
+ _total: 1
+ _comment: ''
+ testcases: testDOMParserParseFromStringXml
+ files: /tests/WPT/Domparsing/DOMParserParseFromStringXmlTest.php
'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Text::getWholeText':
_total: 1
_comment: ''
diff --git a/tests/logs/failures.yml b/tests/logs/failures.yml
index 16e6aed..9823abd 100644
--- a/tests/logs/failures.yml
+++ b/tests/logs/failures.yml
@@ -27,10 +27,16 @@
testcases: Wikimedia\Dodo\Tests\WPT\Dom\Nodes\DocumentFragmentGetElementByIdTest
files: /tests/WPT/Dom/Nodes/DocumentFragmentGetElementByIdTest.php
'Failed asserting that two strings are equal.':
- _total: 1
+ _total: 3
_comment: ''
- testcases: Wikimedia\Dodo\Tests\WPT\Domparsing\DOMParserParseFromStringHtmlTest
- files: /tests/WPT/Domparsing/DOMParserParseFromStringHtmlTest.php
+ testcases: |-
+ Wikimedia\Dodo\Tests\DodoTest
+ Wikimedia\Dodo\Tests\WPT\Domparsing\DOMParserParseFromStringHtmlTest
+ Wikimedia\Dodo\Tests\WPT\Domparsing\XMLSerializerSerializeToStringTest
+ files: |-
+ /tests/DodoTest.php
+ /tests/WPT/Domparsing/DOMParserParseFromStringHtmlTest.php
+ /tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php
'HTMLCollection-Failed asserting that false is true.':
_total: 1
_comment: ''
diff --git a/tools/TestsGenerator/ParserTask.php b/tools/TestsGenerator/ParserTask.php
index c0a9e0c..bc816bb 100644
--- a/tools/TestsGenerator/ParserTask.php
+++ b/tools/TestsGenerator/ParserTask.php
@@ -433,7 +433,9 @@ protected function removeW3CDisparity() : void {
* Removes disparity after js2php.
*/
protected function preProcessWPTTest() : void {
- $find_replace = [ '$document' => '$this->doc',
+ $find_replace = [
+ '$document::URL' => '$this->doc->URL',
+ '$document' => '$this->doc',
'= create(' => '= $this->create(',
'$TypeError' => '$this->type_error',
'Object::keys( $testExtensions )->' => '$testExtensions->',
@@ -722,6 +724,10 @@ public function leaveNode( $node ) {
'myCloneContents',
'nestRangeInOuterContainer',
'restoreIframe',
+ 'createXmlDoc',
+ 'parse',
+ 'serialize',
+ 'checkMetadata',
// These are helper functions defined in classes
// which unfortunately begin with 'test' and so
// PHPUnit will think that they are standalone
@@ -872,6 +878,7 @@ private function getUseStmts( $extraUses ) : array {
'HTMLElement' => 'Wikimedia\Dodo\HTMLElement',
'NodeFilter' => 'Wikimedia\Dodo\NodeFilter',
'new Document' => 'Wikimedia\Dodo\Document',
+ 'instanceof Document' => 'Wikimedia\Dodo\Document',
'XMLDocument' => 'Wikimedia\IDLeDOM\XMLDocument',
'Element' => 'Wikimedia\Dodo\Element',
'Attr' => 'Wikimedia\Dodo\Attr',
@@ -897,18 +904,23 @@ private function getUseStmts( $extraUses ) : array {
'DOMException' => 'Wikimedia\Dodo\DOMException',
'DOMImplementation' => 'Wikimedia\Dodo\DOMImplementation',
'DOMParser' => 'Wikimedia\Dodo\DOMParser',
+ 'XMLSerializer' => 'Wikimedia\Dodo\XMLSerializer',
'Range' => 'Wikimedia\Dodo\Range',
'AbstractRange' => 'Wikimedia\Dodo\AbstractRange',
'StaticRange' => 'Wikimedia\Dodo\StaticRange',
'Common' => 'Wikimedia\Dodo\Tests\Harness\Utils\Common',
];
+ $notAdded = [];
foreach ( $list_ns as $use => $namespace ) {
if (
strpos( $this->test, $use ) !== false ||
( $extraUses[$use] ?? false ) !== false
) {
- $stmts[] = $this->factory->use( $namespace );
+ if ( $notAdded[$namespace] ?? true ) {
+ $stmts[] = $this->factory->use( $namespace );
+ $notAdded[$namespace] = false;
+ }
}
}