Skip to content
This repository has been archived by the owner on Feb 15, 2023. It is now read-only.

Non-recursive tree deletion #392

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/gumbo.h
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,18 @@ struct GumboInternalNode {
} v;
};

/**
* The type for a tree traversal callback.
*/
typedef size_t (*gumbo_tree_iter_callback)(void* userdata, GumboNode* node);

/**
* Call `cb` for each child of `node` and for `node` itself, passing it
* the node and `userdata` as arguments.
*/
size_t gumbo_tree_traverse(
GumboNode* node, void* userdata, gumbo_tree_iter_callback cb);

/**
* The type for an allocator function. Takes the 'userdata' member of the
* GumboParser struct as its first argument. Semantics should be the same as
Expand Down
149 changes: 99 additions & 50 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "util.h"
#include "vector.h"

#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void) (i)

#define GUMBO_STRING(literal) \
{ literal, sizeof(literal) - 1 }
Expand Down Expand Up @@ -124,12 +124,10 @@ static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
GUMBO_STRING(
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
"extensions to HTML 4.0//"),
GUMBO_STRING(
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
"extensions to HTML 4.0//"),
GUMBO_STRING("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
"extensions to HTML 4.0//"),
GUMBO_STRING("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
"extensions to HTML 4.0//"),
GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
Expand Down Expand Up @@ -220,7 +218,8 @@ static const ReplacementEntry kSvgAttributeReplacements[] = {
REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
REPLACEMENT_ENTRY("refx", "refX"),
REPLACEMENT_ENTRY("refy", "refY"),
REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
Expand Down Expand Up @@ -573,9 +572,9 @@ static GumboInsertionMode get_appropriate_insertion_mode(

assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
if (node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML)
return is_last ?
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
return is_last ? GUMBO_INSERTION_MODE_IN_BODY
: GUMBO_INSERTION_MODE_INITIAL;

switch (node->v.element.tag) {
case GUMBO_TAG_SELECT: {
if (is_last) {
Expand Down Expand Up @@ -972,24 +971,24 @@ static void append_comment_node(
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
static void clear_stack_to_table_row_context(GumboParser* parser) {
while (!node_tag_in_set(get_current_node(parser),
(gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
(gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
pop_current_node(parser);
}
}

// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
static void clear_stack_to_table_context(GumboParser* parser) {
while (!node_tag_in_set(get_current_node(parser),
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
pop_current_node(parser);
}
}

// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
void clear_stack_to_table_body_context(GumboParser* parser) {
while (!node_tag_in_set(get_current_node(parser),
(gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
TAG(TEMPLATE)})) {
while (!node_tag_in_set(
get_current_node(parser), (gumbo_tagset){TAG(HTML), TAG(TBODY),
TAG(TFOOT), TAG(THEAD), TAG(TEMPLATE)})) {
pop_current_node(parser);
}
}
Expand Down Expand Up @@ -1486,12 +1485,12 @@ static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
// This is the "generate all implied end tags thoroughly" clause of the spec.
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
for (
; node_tag_in_set(get_current_node(parser),
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
pop_current_node(parser))
for (; node_tag_in_set(get_current_node(parser),
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT),
TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT),
TAG(RTC), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD),
TAG(TR)});
pop_current_node(parser))
;
}

Expand Down Expand Up @@ -1958,7 +1957,8 @@ static bool adoption_agency_algorithm(
if (last_node == furthest_block) {
bookmark = formatting_index + 1;
gumbo_debug("Bookmark moved to %d.\n", bookmark);
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
assert((unsigned int) bookmark <=
state->_active_formatting_elements.length);
}
// Step 13.9.
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
Expand Down Expand Up @@ -2018,7 +2018,8 @@ static bool adoption_agency_algorithm(
gumbo_vector_remove_at(
parser, formatting_node_index, &state->_active_formatting_elements);
assert(bookmark >= 0);
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
assert(
(unsigned int) bookmark <= state->_active_formatting_elements.length);
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
&state->_active_formatting_elements);

Expand Down Expand Up @@ -2349,13 +2350,55 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
}
}

static void destroy_node(GumboParser* parser, GumboNode* node) {
size_t gumbo_tree_traverse(
GumboNode* node, void* userdata, gumbo_tree_iter_callback cb) {
GumboNode* current_node = node;
size_t offset = 0, retcode = 0;
tailcall:
#define RECURSE \
do { \
offset = current_node->index_within_parent + 1; \
GumboNode* next_node = current_node->parent; \
if ((retcode = cb(userdata, current_node))) return retcode; \
if (current_node == node) return 0; \
current_node = next_node; \
goto tailcall; \
} while (0)
switch (current_node->type) {
case GUMBO_NODE_DOCUMENT:
case GUMBO_NODE_TEMPLATE:
case GUMBO_NODE_ELEMENT: {
GumboVector* children = GUMBO_NODE_DOCUMENT == current_node->type
? &current_node->v.document.children
: &current_node->v.element.children;
if (offset >= children->length) {
assert(offset == children->length);
RECURSE;
} else {
current_node = children->data[offset];
offset = 0;
goto tailcall;
}
}
case GUMBO_NODE_TEXT:
case GUMBO_NODE_CDATA:
case GUMBO_NODE_COMMENT:
case GUMBO_NODE_WHITESPACE: {
assert(0 == offset);
RECURSE;
}
default:
assert(!"Invalid GumboNodeType!");
abort();
}
#undef RECURSE
}

static size_t destroy_one_node(void* parser_, GumboNode* node) {
GumboParser* parser = (GumboParser*) parser_;
switch (node->type) {
case GUMBO_NODE_DOCUMENT: {
GumboDocument* doc = &node->v.document;
for (unsigned int i = 0; i < doc->children.length; ++i) {
destroy_node(parser, doc->children.data[i]);
}
gumbo_parser_deallocate(parser, (void*) doc->children.data);
gumbo_parser_deallocate(parser, (void*) doc->name);
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
Expand All @@ -2367,9 +2410,6 @@ static void destroy_node(GumboParser* parser, GumboNode* node) {
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
}
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
destroy_node(parser, node->v.element.children.data[i]);
}
gumbo_parser_deallocate(parser, node->v.element.children.data);
break;
case GUMBO_NODE_TEXT:
Expand All @@ -2380,6 +2420,11 @@ static void destroy_node(GumboParser* parser, GumboNode* node) {
break;
}
gumbo_parser_deallocate(parser, node);
return 0;
}

static void destroy_node(GumboParser* parser, GumboNode* node) {
gumbo_tree_traverse(node, parser, &destroy_one_node);
}

// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
Expand Down Expand Up @@ -2524,8 +2569,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
bool result = maybe_implicitly_close_p_tag(parser, token);
insert_element_from_token(parser, token);
return result;
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
TAG(H4), TAG(H5), TAG(H6)})) {
} else if (tag_in(token, kStartTag,
(gumbo_tagset){
TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6)})) {
bool result = maybe_implicitly_close_p_tag(parser, token);
if (node_tag_in_set(
get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
Expand Down Expand Up @@ -2669,11 +2715,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
}
return implicitly_close_tags(
parser, token, GUMBO_NAMESPACE_HTML, token_tag);
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
TAG(H4), TAG(H5), TAG(H6)})) {
if (!has_an_element_in_scope_with_tagname(
parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
} else if (tag_in(token, kEndTag,
(gumbo_tagset){
TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6)})) {
if (!has_an_element_in_scope_with_tagname(parser, 6,
(GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
GUMBO_TAG_H5, GUMBO_TAG_H6})) {
// No heading open; ignore the token entirely.
parser_add_parse_error(parser, token);
ignore_token(parser);
Expand All @@ -2692,8 +2739,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
do {
current_node = pop_current_node(parser);
} while (!node_tag_in_set(
current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
TAG(H4), TAG(H5), TAG(H6)}));
current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3), TAG(H4),
TAG(H5), TAG(H6)}));
return success;
}
} else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
Expand Down Expand Up @@ -3433,13 +3480,15 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
}
parser->_parser_state->_reprocess_current_token = true;
return close_current_cell(parser, token);
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
} else if (tag_in(token, kEndTag,
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
TAG(COLGROUP), TAG(HTML)})) {
parser_add_parse_error(parser, token);
ignore_token(parser);
return false;
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
} else if (tag_in(token, kEndTag,
(gumbo_tagset){TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
TAG(TR)})) {
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
parser_add_parse_error(parser, token);
ignore_token(parser);
Expand Down Expand Up @@ -3591,9 +3640,9 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
return handle_in_head(parser, token);
} else if (tag_in(
token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
} else if (tag_in(token, kStartTag,
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(TBODY),
TAG(TFOOT), TAG(THEAD)})) {
pop_template_insertion_mode(parser);
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
Expand Down Expand Up @@ -3875,9 +3924,9 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
do {
pop_current_node(parser);
} while (!(is_mathml_integration_point(get_current_node(parser)) ||
is_html_integration_point(get_current_node(parser)) ||
get_current_node(parser)->v.element.tag_namespace ==
GUMBO_NAMESPACE_HTML));
is_html_integration_point(get_current_node(parser)) ||
get_current_node(parser)->v.element.tag_namespace ==
GUMBO_NAMESPACE_HTML));
parser->_parser_state->_reprocess_current_token = true;
return false;
}
Expand Down