Skip to content
This repository has been archived by the owner on Feb 15, 2023. It is now read-only.

Don't crash when memory is exhausted #389

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/attribute.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ GumboAttribute* gumbo_get_attribute(

void gumbo_destroy_attribute(
struct GumboInternalParser* parser, GumboAttribute* attribute) {
if (NULL == attribute)
return;
gumbo_parser_deallocate(parser, (void*) attribute->name);
gumbo_parser_deallocate(parser, (void*) attribute->value);
gumbo_parser_deallocate(parser, (void*) attribute);
Expand Down
2 changes: 2 additions & 0 deletions src/error.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ void gumbo_print_caret_diagnostic(
}

void gumbo_error_destroy(GumboParser* parser, GumboError* error) {
if (NULL == error)
return;
if (error->type == GUMBO_ERR_PARSER ||
error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG) {
gumbo_vector_destroy(parser, &error->v.parser.tag_stack);
Expand Down
32 changes: 32 additions & 0 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <setjmp.h>

#include "attribute.h"
#include "error.h"
Expand All @@ -39,6 +40,12 @@
#define TERMINATOR \
{ "", 0 }

#ifndef _WIN32
#define SETJMP(x) (sigsetjmp(x, 0))
#else
#define SETJMP(x) (setjmp(x))
#endif

typedef char gumbo_tagset[GUMBO_TAG_LAST];
#define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
#define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
Expand Down Expand Up @@ -398,6 +405,13 @@ typedef struct GumboInternalParserState {
// flag appropriately.
bool _closed_body_tag;
bool _closed_html_tag;

// Jump buf to jump to on memory exhaustion.
#ifdef _WIN32
jmp_buf oom_buf;
#else
sigjmp_buf oom_buf;
#endif
} GumboParserState;

static bool token_has_attribute(const GumboToken* token, const char* name) {
Expand Down Expand Up @@ -476,6 +490,7 @@ static void output_init(GumboParser* parser) {
static void parser_state_init(GumboParser* parser) {
GumboParserState* parser_state =
gumbo_parser_allocate(parser, sizeof(GumboParserState));
if (NULL == parser_state) return;
parser_state->_insertion_mode = GUMBO_INSERTION_MODE_INITIAL;
parser_state->_reprocess_current_token = false;
parser_state->_frameset_ok = true;
Expand Down Expand Up @@ -2350,6 +2365,8 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
}

static void destroy_node(GumboParser* parser, GumboNode* node) {
if (NULL == node)
return;
switch (node->type) {
case GUMBO_NODE_DOCUMENT: {
GumboDocument* doc = &node->v.document;
Expand Down Expand Up @@ -4075,11 +4092,24 @@ GumboOutput* gumbo_parse(const char* buffer) {
GumboOutput* gumbo_parse_with_options(
const GumboOptions* options, const char* buffer, size_t length) {
GumboParser parser;
memset(&parser, 0, sizeof parser);
parser._options = options;

if (SETJMP(parser._oom_buf)) {
if (parser._parser_state)
parser_state_destroy(&parser);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this actually deallocate all the used memory? What about all the allocations for nodes and errors?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn’t, even if I add a call to gumbo_destroy_output at the end whenever there would be something non-NULL to pass to it.

One option that is guaranteed to work is to place all allocations in a linked list and on error destroy the list. On the other hand, the user can always do that themselves, since Gumbo can call user-provided callbacks.

Copy link
Contributor

@craigbarnes craigbarnes Oct 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was considering using the arena pull request to solve this problem for lua-gumbo. Unfortunately, the state of that pull request as it stands still contains dozens of unchecked calls to malloc and has a few other problems I can't recall off the top of my head.

Gumbo is essentially an abandoned project at this point though. It seems unlikely that any of the newer bug reports will be fixed in this repo.

if (parser._tokenizer_state)
gumbo_tokenizer_state_destroy(&parser);
return NULL;
}

output_init(&parser);
gumbo_tokenizer_state_init(&parser, buffer, length);
parser_state_init(&parser);

if (NULL == parser._parser_state)
return NULL;

if (options->fragment_context != GUMBO_TAG_LAST) {
fragment_parser_init(
&parser, options->fragment_context, options->fragment_namespace);
Expand Down Expand Up @@ -4179,6 +4209,8 @@ void gumbo_destroy_node(GumboOptions* options, GumboNode* node) {
}

void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
if (NULL == output)
return;
// Need a dummy GumboParser because the allocator comes along with the
// options object.
GumboParser parser;
Expand Down
9 changes: 9 additions & 0 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#ifndef GUMBO_PARSER_H_
#define GUMBO_PARSER_H_

#include <setjmp.h>

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -48,6 +50,13 @@ typedef struct GumboInternalParser {
// The internal parser state. Initialized on parse start and destroyed on
// parse end; end-users will never see a non-garbage value in this pointer.
struct GumboInternalParserState* _parser_state;

// jmp_buf to jump to on out of memory
#ifdef _WIN32
jmp_buf _oom_buf;
#else
sigjmp_buf _oom_buf;
#endif
} GumboParser;

#ifdef __cplusplus
Expand Down
16 changes: 13 additions & 3 deletions src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
#include "util.h"

#include <assert.h>
#include <setjmp.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <stdarg.h>
#include <stdio.h>

#include "gumbo.h"
#include "parser.h"
Expand All @@ -32,7 +33,16 @@
const GumboSourcePosition kGumboEmptySourcePosition = {0, 0, 0};

void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
return parser->_options->allocator(parser->_options->userdata, num_bytes);
void *v = parser->_options->allocator(parser->_options->userdata, num_bytes);
if (NULL == v) {
#ifndef _WIN32
siglongjmp(parser->_oom_buf, 1);
#else
longjmp(parser->_oom_buf, 1);
#endif
} else {
return memset(v, '\0', num_bytes);
}
}

void gumbo_parser_deallocate(GumboParser* parser, void* ptr) {
Expand Down
26 changes: 26 additions & 0 deletions tests/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,32 @@ TEST_F(GumboParserTest, FragmentWithNamespace) {
EXPECT_EQ(0, GetChildCount(div));
}

TEST_F(GumboParserTest, OutOfMemory) {
GumboOptions options;
memcpy((void*)&options, (void*)&kGumboDefaultOptions, sizeof options);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need for memcpy() here. You can just use:

GumboOptions options = kGumboDefaultOptions;

struct Count {
size_t count = 0;
size_t count_max = 0;
} count;
options.userdata = (void*)&count;
options.allocator = [](void *userdata, size_t size) {
auto count = (Count*)userdata;
if (count->count) {
count->count--;
return malloc(size);
} else {
count->count = ++count->count_max;
return (void*)nullptr;
}
};
const char buf[] = "<html><head><title>dummy</title></head><body>some text</body></html>";
GumboOutput *output;
do {
output = gumbo_parse_with_options(&options, buf, sizeof buf - 1);
} while (output == nullptr);
gumbo_destroy_output(&options, output);
}

TEST_F(GumboParserTest, FragmentWithTwoNodes) {
ParseFragment("<h1>Hi</h1><br>", GUMBO_TAG_BODY, GUMBO_NAMESPACE_HTML);

Expand Down