Skip to content

Commit

Permalink
LibWeb: Bring fragment parsing up to spec
Browse files Browse the repository at this point in the history
Corresponds to whatwg/html#10874

Also, parse_fragment() returns ExceptionOr, so stop voiding the error
from append_child().
  • Loading branch information
AtkinsSJ authored and gmta committed Jan 7, 2025
1 parent 7d502df commit 89296b8
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 29 deletions.
11 changes: 5 additions & 6 deletions Libraries/LibWeb/DOM/Element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1545,18 +1545,17 @@ WebIDL::ExceptionOr<GC::Ref<DOM::DocumentFragment>> Element::parse_fragment(Stri
dbgln("FIXME: Handle fragment parsing of XML documents");
}

// 3. Let new children be the result of invoking algorithm given markup, with context set to context.
// 3. Let newChildren be the result of invoking algorithm given context and markup.
auto new_children = algorithm(*this, markup, HTML::HTMLParser::AllowDeclarativeShadowRoots::No);

// 4. Let fragment be a new DocumentFragment whose node document is context's node document.
auto fragment = realm().create<DOM::DocumentFragment>(document());

// 5. Append each Node in new children to fragment (in tree order).
for (auto& child : new_children) {
// I don't know if this can throw here, but let's be safe.
(void)TRY(fragment->append_child(*child));
}
// 5. For each node of newChildren, in tree order: append node to fragment.
for (auto& child : new_children)
TRY(fragment->append_child(*child));

// 6. Return fragment.
return fragment;
}

Expand Down
48 changes: 25 additions & 23 deletions Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4453,29 +4453,32 @@ DOM::Document& HTMLParser::document()
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
Vector<GC::Root<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots)
{
// 1. Create a new Document node, and mark it as being an HTML document.
// 1. Let document be a Document node whose type is "html".
auto temp_document = DOM::Document::create_for_fragment_parsing(context_element.realm());
temp_document->set_document_type(DOM::Document::Type::HTML);

// AD-HOC: We set the about base URL of the document to the same as the context element's document.
// This is required for Document::parse_url() to work inside iframe srcdoc documents.
temp_document->set_about_base_url(context_element.document().about_base_url());

// 2. If the node document of the context element is in quirks mode, then let the Document be in quirks mode.
// Otherwise, the node document of the context element is in limited-quirks mode, then let the Document be in limited-quirks mode.
// Otherwise, leave the Document in no-quirks mode.
temp_document->set_quirks_mode(context_element.document().mode());
// 2. If context's node document is in quirks mode, then set document's mode to "quirks".
if (context_element.document().in_quirks_mode())
temp_document->set_quirks_mode(DOM::QuirksMode::Yes);

// 3. If allowDeclarativeShadowRoots is true, then set Document's allow declarative shadow roots to true.
// 3. Otherwise, if context's node document is in limited-quirks mode, then set document's mode to "limited-quirks".
else if (context_element.document().in_limited_quirks_mode())
temp_document->set_quirks_mode(DOM::QuirksMode::Limited);

// 4. If allowDeclarativeShadowRoots is true, then set document's allow declarative shadow roots to true.
if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes)
temp_document->set_allow_declarative_shadow_roots(true);

// 4. Create a new HTML parser, and associate it with the just created Document node.
// 5. Create a new HTML parser, and associate it with document.
auto parser = HTMLParser::create(*temp_document, markup, "utf-8"sv);
parser->m_context_element = context_element;
parser->m_parsing_fragment = true;

// 5. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
// 6. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
// - title
// - textarea
if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
Expand Down Expand Up @@ -4512,37 +4515,36 @@ Vector<GC::Root<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& contex
// Leave the tokenizer in the data state.
}

// 6. Let root be a new html element with no attributes.
auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
// 7. Let root be the result of creating an element given document, "html", and the HTML namespace.
auto root = MUST(create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML));

// 7. Append the element root to the Document node created above.
// 8. Append root to document.
MUST(temp_document->append_child(root));

// 8. Set up the parser's stack of open elements so that it contains just the single element root.
// 9. Set up the HTML parser's stack of open elements so that it contains just the single element root.
parser->m_stack_of_open_elements.push(root);

// 9. If the context element is a template element,
if (context_element.local_name() == HTML::TagNames::template_) {
// push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
// 10. If context is a template element, then push "in template" onto the stack of template insertion modes
// so that it is the new current template insertion mode.
if (context_element.local_name() == HTML::TagNames::template_)
parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
}

// FIXME: 10. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
// Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point.
// FIXME: 11. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
// Let this start tag token be the start tag token of context; e.g. for the purposes of determining if it is an HTML integration point.

// 11. Reset the parser's insertion mode appropriately.
// 12. Reset the parser's insertion mode appropriately.
parser->reset_the_insertion_mode_appropriately();

// 12. Set the parser's form element pointer to the nearest node to the context element that is a form element
// 13. Set the HTML parser's form element pointer to the nearest node to context that is a form element
// (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
// (If there is no such form element, the form element pointer keeps its initial value, null.)
parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();

// 13. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
// 14. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
// 14. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
// 15. Start the HTML parser and let it run until it has consumed all the characters just inserted into the input stream.
parser->run(context_element.document().url());

// 15. Return the child nodes of root, in tree order.
// 16. Return root's children, in tree order.
Vector<GC::Root<DOM::Node>> children;
while (GC::Ptr<DOM::Node> child = root->first_child()) {
MUST(root->remove_child(*child));
Expand Down

0 comments on commit 89296b8

Please sign in to comment.