From 4852c2a5f5c2c96723699701eb1ef19b54cf96eb Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Fri, 2 Jul 2021 01:15:01 -0400 Subject: [PATCH] Implement XML parser; fixes to XMLSerializer and CDATASection support Bug: w3c/DOM-Parsing#29 Bug: w3c/DOM-Parsing#38 Bug: w3c/DOM-Parsing#47 Bug: w3c/DOM-Parsing#48 Bug: w3c/DOM-Parsing#50 Bug: w3c/DOM-Parsing#52 Bug: w3c/DOM-Parsing#59 Bug: w3c/DOM-Parsing#71 Change-Id: I76735c4be1d9738c690417207301f737e3a3c9ff --- src/CDATASection.php | 19 +++ src/DOMImplementation.php | 3 +- src/DOMParser.php | 133 +++++++++++++++++- src/Document.php | 23 ++- src/DocumentType.php | 1 + src/Internal/NamespacePrefixMap.php | 4 - src/Internal/WhatWG.php | 48 +++++-- src/NamedNodeMap.php | 2 +- src/Window.php | 2 +- src/XMLDocument.php | 3 +- tests/DodoTest.php | 2 +- ...OMImplementationCreateHTMLDocumentTest.php | 1 + .../WPT/Dom/Nodes/DocumentTypeLiteralTest.php | 1 + .../DOMParserParseFromStringXmlTest.php | 11 +- .../XMLSerializerSerializeToStringTest.php | 101 ++++++------- tests/logs/errors.yaml | 36 ++--- tests/logs/failures.yml | 12 +- tools/TestsGenerator/ParserTask.php | 16 ++- 18 files changed, 307 insertions(+), 111 deletions(-) diff --git a/src/CDATASection.php b/src/CDATASection.php index bcfebbc..b5473e5 100644 --- a/src/CDATASection.php +++ b/src/CDATASection.php @@ -4,6 +4,8 @@ namespace Wikimedia\Dodo; +use Wikimedia\Dodo\Internal\BadXMLException; +use Wikimedia\Dodo\Internal\NamespacePrefixMap; use Wikimedia\Dodo\Internal\UnimplementedTrait; class CDATASection extends Text implements \Wikimedia\IDLeDOM\CDATASection { @@ -27,4 +29,21 @@ final public function getNodeType() : int { public function getNodeName() : string { return "#cdata-section"; } + + /** @inheritDoc */ + public function _xmlSerialize( + ?string $namespace, NamespacePrefixMap $prefixMap, int &$prefixIndex, + bool $requireWellFormed, array &$markup + ) : void { + // See https://github.com/w3c/DOM-Parsing/issues/38 + $data = $this->getData(); + if ( $requireWellFormed ) { + if ( strpos( $data, ']]>' ) !== false ) { + throw new BadXMLException(); + } + } + $markup[] = 'getData(); + $markup[] = ']]>'; + } } diff --git a/src/DOMImplementation.php b/src/DOMImplementation.php index 8d24099..6de8036 100644 --- a/src/DOMImplementation.php +++ b/src/DOMImplementation.php @@ -76,7 +76,6 @@ public function createDocumentType( $qualifiedName, $publicId, $systemId ) { $qualifiedName, $publicId, $systemId ); - /* TEMPORARY STUB */ } /** @@ -147,7 +146,7 @@ public function createDocument( ?string $namespace, ?string $qualifiedName = '', /** @inheritDoc */ public function createHTMLDocument( ?string $titleText = null ) { - $d = new Document( $this->_contextObject, 'html', null ); + $d = new Document( $this->_contextObject, 'html', 'text/html', null ); $d->appendChild( new DocumentType( $d, "html" ) ); diff --git a/src/DOMParser.php b/src/DOMParser.php index dc6c307..4053ff8 100644 --- a/src/DOMParser.php +++ b/src/DOMParser.php @@ -4,12 +4,14 @@ namespace Wikimedia\Dodo; +use Exception; use RemexHtml\DOM\DOMBuilder; +use RemexHtml\Tokenizer\NullTokenHandler; use RemexHtml\Tokenizer\Tokenizer; use RemexHtml\TreeBuilder\Dispatcher; use RemexHtml\TreeBuilder\TreeBuilder; -use Wikimedia\Dodo\Internal\UnimplementedException; use Wikimedia\IDLeDOM\DOMParserSupportedType; +use XMLReader; /** * DOMParser @@ -24,9 +26,23 @@ class DOMParser implements \Wikimedia\IDLeDOM\DOMParser { */ public function parseFromString( string $string, /* DOMParserSupportedType */ string $type ) { $type = DOMParserSupportedType::cast( $type ); - if ( $type !== DOMParserSupportedType::text_html ) { - throw new UnimplementedException( __METHOD__ . "( '$type' )" ); + switch ( $type ) { + case DOMParserSupportedType::text_html: + return $this->_parseHtml( $string ); + default: + // XXX if we throw an XML well-formedness error here, we're + /// supposed to make a document describing it, instead of + // throwing an exception. + return $this->_parseXml( $string, $type ); } + } + + /** + * Create an HTML parser, parsing the string as UTF-8. + * @param string $string + * @return Document + */ + private function _parseHtml( string $string ) { $domBuilder = new class( [ 'suppressHtmlNamespace' => true, 'suppressIdAttribute' => true, @@ -42,7 +58,7 @@ protected function createDocument( string $system = null ) { // Force this to be an HTML document (not an XML document) - $this->doc = new Document( null, 'html' ); + $this->doc = new Document( null, 'html', 'text/html' ); return $this->doc; } @@ -76,4 +92,113 @@ public function doctype( $name, $public, $system, $quirks, $sourceStart, $source return $result; } + /** + * An XML parser ... is a construct that follows the rules given in + * XML to map a string of bytes or characters into a Document + * object. + * + * The spec then follows that up with: + * "Note: At the time of writing, no such rules actually exist." + * + * Use the enabled-by-default PHP XMLReader class to do our + * parsing and cram it into a Document somehow, and hope we don't + * mangle things too badly. + * + * @see https://html.spec.whatwg.org/multipage/xhtml.html#xml-parser + * + * @param string $s The string to parse + * @param string $contentType + * @return Document + */ + private function _parseXML( string $s, string $contentType ) { + $reader = new XMLReader(); + $reader->XML( + $s, 'utf-8', + LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_PARSEHUGE + ); + # According to spec, this is a Document not an XMLDocument + $doc = new Document( null, 'xml', $contentType ); + $node = $doc; + $attrNode = null; + while ( $reader->moveToNextAttribute() || $reader->read() ) { + switch ( $reader->nodeType ) { + case XMLReader::END_ELEMENT: + $node = $node->getParentNode(); + // Workaround to prevent us from visiting the attributes again + while ( $reader->moveToNextAttribute() ) { + /* skip */ + } + break; + case XMLReader::ELEMENT: + $qname = $reader->prefix ?? ''; + if ( $qname !== '' ) { + $qname .= ':'; + } + $qname .= $reader->localName; + // This will be the node we'll attach attributes to! + $attrNode = $doc->createElementNS( $reader->namespaceURI, $qname ); + $node->appendChild( $attrNode ); + // We don't get an END_ELEMENT from the reader if this is + // an empty element (sigh) + if ( !$reader->isEmptyElement ) { + $node = $attrNode; + } + break; + case XMLReader::ATTRIBUTE: + $qname = $reader->prefix ?? ''; + if ( $qname !== '' ) { + $qname .= ':'; + } + $qname .= $reader->localName; + '@phan-var Element $attrNode'; + $attrNode->setAttributeNS( + $reader->namespaceURI, $qname, $reader->value + ); + break; + case XMLReader::TEXT: + $nn = $doc->createTextNode( $reader->value ); + $node->appendChild( $nn ); + break; + case XMLReader::CDATA: + $nn = $doc->createCDATASection( $reader->value ); + $node->appendChild( $nn ); + break; + case XMLReader::DOC_TYPE: + # This is a hack: the PHP XMLReader interface provides no + # way to extract the contents of a DOC_TYPE node! So we're + # going to give it to the HTML tokenizer to interpret. + $tokenHandler = new class extends NullTokenHandler { + /** @var string */ + public $name; + /** @var string */ + public $publicId; + /** @var string */ + public $systemId; + + /** @inheritDoc */ + public function doctype( + $name, $publicId, $systemId, + $quirks, $sourceStart, $sourceLength + ) { + $this->name = $name; + $this->publicId = $publicId; + $this->systemId = $systemId; + } + }; + ( new Tokenizer( + $tokenHandler, $reader->readOuterXml(), [] + ) )->execute( [] ); + $nn = $doc->getImplementation()->createDocumentType( + $tokenHandler->name, + $tokenHandler->publicId, + $tokenHandler->systemId + ); + $node->appendChild( $nn ); + break; + default: + throw new Exception( "Unknown node type: " . $reader->nodeType ); + } + } + return $doc; + } } diff --git a/src/Document.php b/src/Document.php index 4f8a274..fddff99 100644 --- a/src/Document.php +++ b/src/Document.php @@ -233,11 +233,13 @@ private function _updateDoctypeAndDocumentElement(): void { /** * @param ?Document $originDoc * @param string $type + * @param string $contentType * @param ?string $url */ public function __construct( ?Document $originDoc = null, string $type = "xml", + string $contentType = 'text/xml', ?string $url = null ) { parent::__construct( $this ); @@ -249,6 +251,9 @@ public function __construct( if ( $type === 'html' ) { $this->_contentType = 'text/html'; $this->_typeIsHtml = true; + } else { + $this->_contentType = $contentType; + $this->_typeIsHtml = false; } /* DOM-LS: used by the documentURI and URL method */ @@ -280,6 +285,7 @@ public function _getTemplateDoc() { $newDoc = new Document( $this, $this->_typeIsHtml ? 'html' : 'xml', + $this->_contentType, $this->_URL ); $this->_templateDocCache = $newDoc->_templateDocCache = $newDoc; @@ -465,6 +471,17 @@ public function createTextNode( string $data ) : Text { return new Text( $this, $data ); } + /** @inheritDoc */ + public function createCDATASection( string $data ) : CDATASection { + if ( $this->_isHTMLDocument() ) { + Util::error( 'NotSupportedError' ); + } + if ( strpos( $data, ']]>' ) !== false ) { + Util::error( 'InvalidCharacterError' ); + } + return new CDATASection( $this, $data ); + } + /** @inheritDoc */ public function createComment( string $data ) : Comment { return new Comment( $this, $data ); @@ -836,10 +853,10 @@ protected function _subclassCloneNodeShallow(): Node { $shallow = new Document( $this, $this->_typeIsHtml ? 'html' : 'xml', + $this->_contentType, $this->_URL ); $shallow->_mode = $this->_mode; - $shallow->_contentType = $this->_contentType; return $shallow; } @@ -870,6 +887,10 @@ public function _xmlSerialize( throw new BadXMLException(); } } + // Emitting the XML declaration is not yet in the spec: + // https://github.com/w3c/DOM-Parsing/issues/50 + $markup[] = ''; + for ( $child = $this->getFirstChild(); $child !== null; $child = $child->getNextSibling() ) { $child->_xmlSerialize( $namespace, $prefixMap, $prefixIndex, $requireWellFormed, diff --git a/src/DocumentType.php b/src/DocumentType.php index df44a1d..23db719 100644 --- a/src/DocumentType.php +++ b/src/DocumentType.php @@ -141,6 +141,7 @@ public function _xmlSerialize( if ( $this->_publicId === '' ) { $markup[] = " SYSTEM"; } + // https://github.com/w3c/DOM-Parsing/issues/71 $quote = strpos( $this->_systemId, '"' ) === false ? '"' : "'"; $markup[] = ' ' . $quote . $this->_systemId . $quote; } diff --git a/src/Internal/NamespacePrefixMap.php b/src/Internal/NamespacePrefixMap.php index 1e29826..b485d92 100644 --- a/src/Internal/NamespacePrefixMap.php +++ b/src/Internal/NamespacePrefixMap.php @@ -108,9 +108,6 @@ public function retrievePreferredPrefix( ?string $namespace, ?string $preferredPrefix ) : ?string { - if ( $preferredPrefix === null ) { - return null; - } $last = null; $candidatesList = $this->map[self::makeKey( $namespace )] ?? []; foreach ( $candidatesList as $prefix ) { @@ -149,5 +146,4 @@ public function generatePrefix( $this->add( $newNamespace, $generatedPrefix ); return $generatedPrefix; } - } diff --git a/src/Internal/WhatWG.php b/src/Internal/WhatWG.php index 299fe00..017f635 100644 --- a/src/Internal/WhatWG.php +++ b/src/Internal/WhatWG.php @@ -482,6 +482,7 @@ public static function ensure_insert_valid( Node $node, Node $parent, ?Node $chi case Node::DOCUMENT_TYPE_NODE: case Node::ELEMENT_NODE: case Node::TEXT_NODE: + case Node::CDATA_SECTION_NODE: // also a Text node case Node::PROCESSING_INSTRUCTION_NODE: case Node::COMMENT_NODE: break; @@ -492,10 +493,11 @@ public static function ensure_insert_valid( Node $node, Node $parent, ?Node $chi /* * DOM-LS #5. If either: * -node is a Text and parent is a Document + * (CDATA counts as a Text node) * -node is a DocumentType and parent is not a Document * throw a HierarchyRequestError */ - if ( ( $node->getNodeType() === Node::TEXT_NODE && $parent->getNodeType() === Node::DOCUMENT_NODE ) + if ( ( ( $node->getNodeType() === Node::TEXT_NODE || $node->getNodeType() === Node::CDATA_SECTION_NODE ) && $parent->getNodeType() === Node::DOCUMENT_NODE ) || ( $node->getNodeType() === Node::DOCUMENT_TYPE_NODE && $parent->getNodeType() !== Node::DOCUMENT_NODE ) ) { Util::error( "HierarchyRequestError" ); } @@ -519,7 +521,7 @@ public static function ensure_insert_valid( Node $node, Node $parent, ?Node $chi $count_element = 0; for ( $n = $node->getFirstChild(); $n !== null; $n = $n->getNextSibling() ) { - if ( $n->getNodeType() === Node::TEXT_NODE ) { + if ( $n->getNodeType() === Node::TEXT_NODE || $n->getNodeType() === Node::CDATA_SECTION_NODE ) { $count_text++; } if ( $n->getNodeType() === Node::ELEMENT_NODE ) { @@ -668,6 +670,7 @@ public static function ensure_replace_valid( Node $node, Node $parent, Node $chi case Node::DOCUMENT_TYPE_NODE: case Node::ELEMENT_NODE: case Node::TEXT_NODE: + case Node::CDATA_SECTION_NODE: // this is also a Text node case Node::PROCESSING_INSTRUCTION_NODE: case Node::COMMENT_NODE: break; @@ -678,10 +681,11 @@ public static function ensure_replace_valid( Node $node, Node $parent, Node $chi /* * DOM-LS #5. If either: * -node is a Text and parent is a Document + * (CDATA counts as a Text node) * -node is a DocumentType and parent is not a Document * throw a HierarchyRequestError */ - if ( ( $node->getNodeType() === Node::TEXT_NODE && $parent->getNodeType() === Node::DOCUMENT_NODE ) + if ( ( ( $node->getNodeType() === Node::TEXT_NODE || $node->getNodeType() === Node::CDATA_SECTION_NODE ) && $parent->getNodeType() === Node::DOCUMENT_NODE ) || ( $node->getNodeType() === Node::DOCUMENT_TYPE_NODE && $parent->getNodeType() !== Node::DOCUMENT_NODE ) ) { Util::error( "HierarchyRequestError" ); } @@ -705,7 +709,7 @@ public static function ensure_replace_valid( Node $node, Node $parent, Node $chi $count_element = 0; for ( $n = $node->getFirstChild(); $n !== null; $n = $n->getNextSibling() ) { - if ( $n->getNodeType() === Node::TEXT_NODE ) { + if ( $n->getNodeType() === Node::TEXT_NODE || $n->getNodeType() === Node::CDATA_SECTION_NODE ) { $count_text++; } if ( $n->getNodeType() === Node::ELEMENT_NODE ) { @@ -968,7 +972,12 @@ public static function xmlSerializeElement( $markup[] = $qualifiedName; } else { $prefix = $el->getPrefix(); - $candidatePrefix = $map->retrievePreferredPrefix( $ns, $prefix ); + if ( $prefix === null && $ns === $localDefaultNamespace ) { + // https://github.com/w3c/DOM-Parsing/issues/52 + $candidatePrefix = null; + } else { + $candidatePrefix = $map->retrievePreferredPrefix( $ns, $prefix ); + } if ( $prefix === 'xmlns' ) { if ( $requireWellFormed ) { throw new BadXMLException(); @@ -1007,7 +1016,8 @@ public static function xmlSerializeElement( } } elseif ( $localDefaultNamespace === null || - $localDefaultNamespace !== $ns + // https://github.com/w3c/DOM-Parsing/issues/47 + $localDefaultNamespace !== ( $ns ?? '' ) ) { // The namespace still needs to be serialized, but there's // no prefix or candidate prefix available. Use the default @@ -1114,7 +1124,11 @@ public static function xmlSerializeAttributes( continue; } if ( $attr->getPrefix() === null ) { - if ( $ignoreNamespaceDefinitionAttribute ) { + if ( + $ignoreNamespaceDefinitionAttribute && + // https://github.com/w3c/DOM-Parsing/issues/47 + ( $attr->getValue() ?? '' ) !== ( $attr->getOwnerElement()->getNamespaceURI() ?? '' ) + ) { continue; } } elseif ( @@ -1131,14 +1145,25 @@ public static function xmlSerializeAttributes( if ( $attr->getValue() === Util::NAMESPACE_XMLNS ) { throw new BadXMLException(); } - if ( $attr->getValue() === '' ) { + if ( + // https://github.com/w3c/DOM-Parsing/issues/48 + $attr->getPrefix() !== null && + $attr->getValue() === '' + ) { throw new BadXMLException(); } } if ( $attr->getPrefix() === 'xmlns' ) { $candidatePrefix = 'xmlns'; } - } else { + } elseif ( $candidatePrefix === null ) { + // The above condition is not (yet) in the spec. + // Firefox also tries to preserve the attributes + // existing prefix (if any) in this case, which isn't + // (yet?) reflected in the spec or the test case. + // See discussion at + // https://github.com/w3c/DOM-Parsing/issues/29 + // attribute namespace is not the XMLNS namespace $candidatePrefix = $map->generatePrefix( $attrNs, $prefixIndex @@ -1197,6 +1222,11 @@ public static function xmlSerializeAttrValue( '"' => '"', '<' => '<', '>' => '>', + // These aren't in the spec, but should be: + // https://github.com/w3c/DOM-Parsing/issues/59 + "\t" => ' ', + "\n" => ' ', + "\r" => ' ', ] ); } diff --git a/src/NamedNodeMap.php b/src/NamedNodeMap.php index 14d3538..f6fa676 100644 --- a/src/NamedNodeMap.php +++ b/src/NamedNodeMap.php @@ -287,7 +287,7 @@ public function setNamedItemNS( $attr ) { $oldAttr = $this->getNamedItemNS( $attr->getNamespaceURI(), $attr->getLocalName() ); - if ( $oldAttr == $attr ) { + if ( $oldAttr === $attr ) { return $attr; } diff --git a/src/Window.php b/src/Window.php index 61b60ed..a5f4941 100644 --- a/src/Window.php +++ b/src/Window.php @@ -49,7 +49,7 @@ class Window extends EventTarget { */ public function __construct( Document $doc = null ) { if ( $doc == null ) { - $doc = new Document( null, 'html' ); + $doc = new Document( null, 'html', 'text/html' ); } $this->document = $doc; diff --git a/src/XMLDocument.php b/src/XMLDocument.php index 9fdef27..8ead849 100644 --- a/src/XMLDocument.php +++ b/src/XMLDocument.php @@ -13,7 +13,6 @@ class XMLDocument extends Document implements \Wikimedia\IDLeDOM\XMLDocument { * @param string $contentType */ public function __construct( ?Document $originDoc, string $contentType ) { - parent::__construct( $originDoc, 'xml', null ); - $this->_contentType = $contentType; + parent::__construct( $originDoc, 'xml', $contentType, null ); } } diff --git a/tests/DodoTest.php b/tests/DodoTest.php index 875e6fa..432879f 100644 --- a/tests/DodoTest.php +++ b/tests/DodoTest.php @@ -26,7 +26,7 @@ class DodoTest extends \PHPUnit\Framework\TestCase { */ public function testDodo() { /* Instantiate the nodes */ - $doc = new Document( null, 'html' ); + $doc = new Document( null, 'html', 'text/html' ); $all_elements = $doc->getElementsByTagName( '*' ); $this->assertSame( 0, $all_elements->length ); diff --git a/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php b/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php index f169b88..349fefe 100644 --- a/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php +++ b/tests/WPT/Dom/Nodes/DOMImplementationCreateHTMLDocumentTest.php @@ -1,6 +1,7 @@ wptAssertTrue($doc instanceof Document, 'Should be Document'); - $this->wptAssertEquals($doc->URL, $this->doc::URL, 'URL'); - $this->wptAssertEquals($doc->documentURI, $this->doc::URL, 'documentURI'); - $this->wptAssertEquals($doc->baseURI, $this->doc::URL, 'baseURI'); + $this->wptAssertEquals($doc->URL, $this->doc->URL, 'URL'); + $this->wptAssertEquals($doc->documentURI, $this->doc->URL, 'documentURI'); + $this->wptAssertEquals($doc->baseURI, $this->doc->URL, 'baseURI'); $this->wptAssertEquals($doc->characterSet, 'UTF-8', 'characterSet'); $this->wptAssertEquals($doc->charset, 'UTF-8', 'charset'); $this->wptAssertEquals($doc->inputEncoding, 'UTF-8', 'inputEncoding'); @@ -29,7 +30,7 @@ public function testDOMParserParseFromStringXml() $p = new DOMParser(); $doc = $p->parseFromString('', $type); $this->wptAssertTrue($doc instanceof Document, 'Should be Document'); - checkMetadata($doc, $type); + $this->checkMetadata($doc, $type); $this->wptAssertEquals($doc->documentElement->namespaceURI, null); $this->wptAssertEquals($doc->documentElement->localName, 'foo'); $this->wptAssertEquals($doc->documentElement->tagName, 'foo'); @@ -42,7 +43,7 @@ public function testDOMParserParseFromStringXml() $this->assertTest(function () use(&$type) { $p = new DOMParser(); $doc = $p->parseFromString('', $type); - checkMetadata($doc, $type); + $this->checkMetadata($doc, $type); $this->wptAssertEquals($doc->documentElement->namespaceURI, 'http://www.mozilla.org/newlayout/xml/parsererror.xml'); $this->wptAssertEquals($doc->documentElement->localName, 'parsererror'); $this->wptAssertEquals($doc->documentElement->tagName, 'parsererror'); diff --git a/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php b/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php index abc5bda..b75ebd8 100644 --- a/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php +++ b/tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php @@ -5,6 +5,7 @@ use Wikimedia\Dodo\Element; use Wikimedia\Dodo\Attr; use Wikimedia\Dodo\DOMParser; +use Wikimedia\Dodo\XMLSerializer; use Wikimedia\Dodo\Tests\Harness\WPTTestHarness; // @see vendor/web-platform-tests/wpt/domparsing/XMLSerializer-serializeToString.html. class XMLSerializerSerializeToStringTest extends WPTTestHarness @@ -28,33 +29,33 @@ public function testXMLSerializerSerializeToString() $this->doc = $this->loadHtmlFile('vendor/web-platform-tests/wpt/domparsing/XMLSerializer-serializeToString.html'); $XMLNS_URI = 'http://www.w3.org/2000/xmlns/'; $this->assertTest(function () { - $root = createXmlDoc()->documentElement; - $this->wptAssertEquals(serialize($root), 'value1'); + $root = $this->createXmlDoc()->documentElement; + $this->wptAssertEquals($this->serialize($root), 'value1'); }, 'check XMLSerializer.serializeToString method could parsing xmldoc to string'); $this->assertTest(function () { - $root = parse('
'); - $this->wptAssertEquals(serialize($root->ownerDocument), '
'); + $root = $this->parse('
'); + $this->wptAssertEquals($this->serialize($root->ownerDocument), '
'); }, 'check XMLSerializer.serializeToString method could parsing document to string'); $this->assertTest(function () { - $root = createXmlDoc()->documentElement; + $root = $this->createXmlDoc()->documentElement; $element = $root->ownerDocument->createElementNS('urn:foo', 'another'); $child1 = $root->firstChild; $root->replaceChild($element, $child1); $element->appendChild($child1); - $this->wptAssertEquals(serialize($root), 'value1'); + $this->wptAssertEquals($this->serialize($root), 'value1'); }, 'Check if the default namespace is correctly reset.'); $this->assertTest(function () { - $root = parse('value1'); - $this->wptAssertEquals(serialize($root), 'value1'); + $root = $this->parse('value1'); + $this->wptAssertEquals($this->serialize($root), 'value1'); }, 'Check if there is no redundant empty namespace declaration.'); // https://github.com/w3c/DOM-Parsing/issues/47 $this->assertTest(function () { - $this->wptAssertEquals(serialize(parse('')), ''); - $this->wptAssertEquals(serialize(parse('')), ''); - $this->wptAssertEquals(serialize(parse('')), ''); + $this->wptAssertEquals($this->serialize($this->parse('')), ''); + $this->wptAssertEquals($this->serialize($this->parse('')), ''); + $this->wptAssertEquals($this->serialize($this->parse('')), ''); }, 'Check if redundant xmlns="..." is dropped.'); $this->assertTest(function () use(&$XMLNS_URI) { - $root = parse(''); + $root = $this->parse(''); $child = $root->ownerDocument->createElement('child'); $child->setAttributeNS($XMLNS_URI, 'xmlns', 'FAIL1'); $root->appendChild($child); @@ -70,121 +71,121 @@ public function testXMLSerializerSerializeToString() $child5 = $root->ownerDocument->createElement('child5'); $child5->setAttributeNS($XMLNS_URI, 'xmlns', ''); $root->appendChild($child5); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if inconsistent xmlns="..." is dropped.'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->setAttributeNS('uri', 'name', 'v'); - $this->wptAssertEquals(serialize($root), ''); - $root2 = parse(''); + $this->wptAssertEquals($this->serialize($root), ''); + $root2 = $this->parse(''); $child = $root2->firstChild; $child->setAttributeNS('uri', 'name', 'v'); - $this->wptAssertEquals(serialize($root2), ''); - $root3 = parse(''); + $this->wptAssertEquals($this->serialize($root2), ''); + $root3 = $this->parse(''); $child3 = $root3->firstChild; $child3->setAttributeNS('uri', 'name', 'v'); - $this->wptAssertEquals(serialize($root3), '', 'Should choose the nearest prefix'); + $this->wptAssertEquals($this->serialize($root3), '', 'Should choose the nearest prefix'); }, 'Check if an attribute with namespace and no prefix is serialized with the nearest-declared prefix'); // https://github.com/w3c/DOM-Parsing/issues/45 $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->firstChild->setAttributeNS('u1', 'name', 'v'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if an attribute with namespace and no prefix is serialized with the nearest-declared prefix even if the prefix is assigned to another namespace.'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->setAttributeNS('uri', 'p:name', 'v'); - $this->wptAssertEquals(serialize($root), ''); - $root2 = parse(''); + $this->wptAssertEquals($this->serialize($root), ''); + $root2 = $this->parse(''); $child = $root2->firstChild; $child->setAttributeNS('uri', 'p:name', 'value'); - $this->wptAssertEquals(serialize($root2), ''); + $this->wptAssertEquals($this->serialize($root2), ''); }, 'Check if the prefix of an attribute is replaced with another existing prefix mapped to the same namespace URI.'); // https://github.com/w3c/DOM-Parsing/issues/29 $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->setAttributeNS('uri2', 'p:name', 'value'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if the prefix of an attribute is NOT preserved in a case where neither its prefix nor its namespace URI is not already used.'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->setAttributeNS('uri2', 'xx:name', 'value'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if the prefix of an attribute is replaced with a generated one in a case where the prefix is already mapped to a different namespace URI.'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->setAttribute('attr', "\t"); - $this->wptAssertInArray(serialize($root), ['', '']); + $this->wptAssertInArray($this->serialize($root), ['', '']); $root->setAttribute('attr', "\n"); - $this->wptAssertInArray(serialize($root), ['', '']); + $this->wptAssertInArray($this->serialize($root), ['', '']); $root->setAttribute('attr', "\r"); - $this->wptAssertInArray(serialize($root), ['', '']); + $this->wptAssertInArray($this->serialize($root), ['', '']); }, 'check XMLSerializer.serializeToString escapes attribute values for roundtripping'); $this->assertTest(function () use(&$XMLNS_URI) { $root = (new Document())->createElement('root'); $root->setAttributeNS('uri1', 'p:foobar', 'value1'); $root->setAttributeNS($XMLNS_URI, 'xmlns:p', 'uri2'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if attribute serialization takes into account of following xmlns:* attributes'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->firstChild->setAttributeNS('uri2', 'p:foobar', 'v'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if attribute serialization takes into account of the same prefix declared in an ancestor element'); $this->assertTest(function () { - $this->wptAssertEquals(serialize(parse('')), ''); - $this->wptAssertEquals(serialize(parse('')), ''); + $this->wptAssertEquals($this->serialize($this->parse('')), ''); + $this->wptAssertEquals($this->serialize($this->parse('')), ''); }, 'Check if start tag serialization drops element prefix if the namespace is same as inherited default namespace.'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $child2 = $root->ownerDocument->createElementNS('u1', 'child2'); $root->firstChild->appendChild($child2); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if start tag serialization finds an appropriate prefix.'); $this->assertTest(function () use(&$XMLNS_URI) { $root = (new Document())->createElementNS('uri1', 'p:root'); $root->setAttributeNS($XMLNS_URI, 'xmlns:p', 'uri2'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if start tag serialization takes into account of its xmlns:* attributes'); $this->assertTest(function () use(&$XMLNS_URI) { $root = (new Document())->createElement('root'); $root->setAttributeNS($XMLNS_URI, 'xmlns:p', 'uri2'); $child = $root->ownerDocument->createElementNS('uri1', 'p:child'); $root->appendChild($child); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if start tag serialization applied the original prefix even if it is declared in an ancestor element.'); // https://github.com/w3c/DOM-Parsing/issues/52 $this->assertTest(function () { - $this->wptAssertEquals(serialize(parse('
')), ''); + $this->wptAssertEquals($this->serialize($this->parse('
')), ''); }, 'Check if start tag serialization does NOT apply the default namespace if its namespace is declared in an ancestor.'); $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->firstChild->setAttributeNS('uri1', 'attr1', 'value1'); $root->firstChild->setAttributeNS('uri2', 'attr2', 'value2'); $root->lastChild->setAttributeNS('uri3', 'attr3', 'value3'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if generated prefixes match to "ns${index}".'); // https://github.com/w3c/DOM-Parsing/issues/44 // According to 'DOM Parsing and Serialization' draft as of 2018-12-11, // 'generate a prefix' result can conflict with an existing xmlns:ns* declaration. $this->assertTest(function () { - $root = parse(''); + $root = $this->parse(''); $root->firstChild->setAttributeNS('uri3', 'attr1', 'value1'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); }, 'Check if "ns1" is generated even if the element already has xmlns:ns1.'); $this->assertTest(function () { $root = (new Document())->createElement('root'); $root->setAttributeNS('http://www.w3.org/1999/xlink', 'href', 'v'); - $this->wptAssertEquals(serialize($root), ''); + $this->wptAssertEquals($this->serialize($root), ''); $root2 = (new Document())->createElement('root'); $root2->setAttributeNS('http://www.w3.org/1999/xlink', 'xl:type', 'v'); - $this->wptAssertEquals(serialize($root2), ''); + $this->wptAssertEquals($this->serialize($root2), ''); }, 'Check if no special handling for XLink namespace unlike HTML serializer.'); $this->assertTest(function () { $root = new DocumentFragment($this->doc); $root->append($this->doc->createElement('div')); $root->append($this->doc->createElement('span')); - $this->wptAssertEquals(serialize($root), '
'); + $this->wptAssertEquals($this->serialize($root), '
'); }, 'Check if document fragment serializes.'); } } diff --git a/tests/logs/errors.yaml b/tests/logs/errors.yaml index efef835..f251c97 100644 --- a/tests/logs/errors.yaml +++ b/tests/logs/errors.yaml @@ -288,11 +288,6 @@ _comment: '' testcases: testHcNodereplacechildnewchildexists files: /tests/W3C/Level1/Core/HcNodereplacechildnewchildexistsTest.php -'Error: Call to undefined function Wikimedia\Dodo\Tests\WPT\Domparsing\createXmlDoc()': - _total: 1 - _comment: '' - testcases: testXMLSerializerSerializeToString - files: /tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php 'Error: Call to undefined function Wikimedia\Dodo\Tests\WPT\Domparsing\testIsChild()': _total: 1 _comment: '' @@ -459,6 +454,11 @@ _comment: '' testcases: testDocumentConstructor files: /tests/WPT/Dom/Nodes/DocumentConstructorTest.php +'Undefined property via __get(): readyState in /src/Document.php on line 69': + _total: 1 + _comment: '' + testcases: testXmldomparser + files: /tests/WPT/Domparsing/XmldomparserTest.php 'Undefined property via __get(): role in /tests/WPT/Dom/Nodes/AriaAttributeReflectionTentativeTest.php on line 15': _total: 1 _comment: '' @@ -642,22 +642,6 @@ _comment: '' testcases: testDocumentCreateTextNode files: /tests/WPT/Dom/Nodes/DocumentCreateTextNodeTest.php -'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\DOMParser::parseFromString( ''application/xhtml+xml'' )': - _total: 1 - _comment: '' - testcases: testDOMParserParseFromStringXmlDoctype - files: /tests/WPT/Domparsing/DOMParserParseFromStringXmlDoctypeTest.php -'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\DOMParser::parseFromString( ''text/xml'' )': - _total: 3 - _comment: '' - testcases: |- - testDOMParserParseFromStringXml - testNodeNormalize - testXmldomparser - files: |- - /tests/WPT/Dom/Nodes/NodeNormalizeTest.php - /tests/WPT/Domparsing/DOMParserParseFromStringXmlTest.php - /tests/WPT/Domparsing/XmldomparserTest.php 'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Document::_createElementNS': _total: 7 _comment: '' @@ -682,11 +666,6 @@ _comment: '' testcases: testAppendOnDocument files: /tests/WPT/Dom/Nodes/AppendOnDocumentTest.php -'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Document::createCDATASection': - _total: 1 - _comment: '' - testcases: testDocumentCreateCDATASection - files: /tests/WPT/Dom/Nodes/DocumentCreateCDATASectionTest.php 'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Document::createTreeWalker': _total: 7 _comment: '' @@ -1129,6 +1108,11 @@ /tests/WPT/Dom/Collections/HTMLCollectionEmptyNameTest.php /tests/WPT/Dom/Collections/HTMLCollectionIteratorTest.php /tests/WPT/Dom/Collections/HTMLCollectionOwnPropsTest.php +'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Node::getBaseURI': + _total: 1 + _comment: '' + testcases: testDOMParserParseFromStringXml + files: /tests/WPT/Domparsing/DOMParserParseFromStringXmlTest.php 'Wikimedia\Dodo\Internal\UnimplementedException: Wikimedia\Dodo\Text::getWholeText': _total: 1 _comment: '' diff --git a/tests/logs/failures.yml b/tests/logs/failures.yml index 16e6aed..9823abd 100644 --- a/tests/logs/failures.yml +++ b/tests/logs/failures.yml @@ -27,10 +27,16 @@ testcases: Wikimedia\Dodo\Tests\WPT\Dom\Nodes\DocumentFragmentGetElementByIdTest files: /tests/WPT/Dom/Nodes/DocumentFragmentGetElementByIdTest.php 'Failed asserting that two strings are equal.': - _total: 1 + _total: 3 _comment: '' - testcases: Wikimedia\Dodo\Tests\WPT\Domparsing\DOMParserParseFromStringHtmlTest - files: /tests/WPT/Domparsing/DOMParserParseFromStringHtmlTest.php + testcases: |- + Wikimedia\Dodo\Tests\DodoTest + Wikimedia\Dodo\Tests\WPT\Domparsing\DOMParserParseFromStringHtmlTest + Wikimedia\Dodo\Tests\WPT\Domparsing\XMLSerializerSerializeToStringTest + files: |- + /tests/DodoTest.php + /tests/WPT/Domparsing/DOMParserParseFromStringHtmlTest.php + /tests/WPT/Domparsing/XMLSerializerSerializeToStringTest.php 'HTMLCollection-Failed asserting that false is true.': _total: 1 _comment: '' diff --git a/tools/TestsGenerator/ParserTask.php b/tools/TestsGenerator/ParserTask.php index c0a9e0c..bc816bb 100644 --- a/tools/TestsGenerator/ParserTask.php +++ b/tools/TestsGenerator/ParserTask.php @@ -433,7 +433,9 @@ protected function removeW3CDisparity() : void { * Removes disparity after js2php. */ protected function preProcessWPTTest() : void { - $find_replace = [ '$document' => '$this->doc', + $find_replace = [ + '$document::URL' => '$this->doc->URL', + '$document' => '$this->doc', '= create(' => '= $this->create(', '$TypeError' => '$this->type_error', 'Object::keys( $testExtensions )->' => '$testExtensions->', @@ -722,6 +724,10 @@ public function leaveNode( $node ) { 'myCloneContents', 'nestRangeInOuterContainer', 'restoreIframe', + 'createXmlDoc', + 'parse', + 'serialize', + 'checkMetadata', // These are helper functions defined in classes // which unfortunately begin with 'test' and so // PHPUnit will think that they are standalone @@ -872,6 +878,7 @@ private function getUseStmts( $extraUses ) : array { 'HTMLElement' => 'Wikimedia\Dodo\HTMLElement', 'NodeFilter' => 'Wikimedia\Dodo\NodeFilter', 'new Document' => 'Wikimedia\Dodo\Document', + 'instanceof Document' => 'Wikimedia\Dodo\Document', 'XMLDocument' => 'Wikimedia\IDLeDOM\XMLDocument', 'Element' => 'Wikimedia\Dodo\Element', 'Attr' => 'Wikimedia\Dodo\Attr', @@ -897,18 +904,23 @@ private function getUseStmts( $extraUses ) : array { 'DOMException' => 'Wikimedia\Dodo\DOMException', 'DOMImplementation' => 'Wikimedia\Dodo\DOMImplementation', 'DOMParser' => 'Wikimedia\Dodo\DOMParser', + 'XMLSerializer' => 'Wikimedia\Dodo\XMLSerializer', 'Range' => 'Wikimedia\Dodo\Range', 'AbstractRange' => 'Wikimedia\Dodo\AbstractRange', 'StaticRange' => 'Wikimedia\Dodo\StaticRange', 'Common' => 'Wikimedia\Dodo\Tests\Harness\Utils\Common', ]; + $notAdded = []; foreach ( $list_ns as $use => $namespace ) { if ( strpos( $this->test, $use ) !== false || ( $extraUses[$use] ?? false ) !== false ) { - $stmts[] = $this->factory->use( $namespace ); + if ( $notAdded[$namespace] ?? true ) { + $stmts[] = $this->factory->use( $namespace ); + $notAdded[$namespace] = false; + } } }