Skip to content

Commit

Permalink
Scrub body_data params too (e.g. POSTed JSON)
Browse files Browse the repository at this point in the history
If we have `$c->req->body_data` - for e.g. the request was a POST with
a JSON body which Catalyst has decoded into `$c->req->body_data` - then
scrub HTML in there too (but applying the same `ignore_params` checks so
that you can exempt certain JSON body params from scrubbing).

Also moved the ignore_params tests into t/03_params.t, and added the
tests for this new feature there too - don't need so many individual
test apps, when most features can be tested with a single test app.
  • Loading branch information
bigpresh committed Aug 11, 2023
1 parent 6ac9a8e commit 2f85e05
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 119 deletions.
93 changes: 70 additions & 23 deletions lib/Catalyst/Plugin/HTML/Scrubber.pm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
package Catalyst::Plugin::HTML::Scrubber;

$Catalyst::Plugin::HTML::Scrubber::VERSION = '0.04';
use Moose;
use namespace::autoclean;

Expand Down Expand Up @@ -47,33 +47,80 @@ sub prepare_parameters {
sub html_scrub {
my ($c, $conf) = @_;

param:
for my $param (keys %{ $c->request->{parameters} }) {
#while (my ($param, $value) = each %{ $c->request->{parameters} }) {
my $value = \$c->request->{parameters}{$param};
if (ref $$value && ref $$value ne 'ARRAY') {
next param;
}
# If there's body_data - for e.g. a POSTed JSON body that was decoded -
# then we need to walk through it, scrubbing as appropriate
if (my $body_data = $c->request->body_data) {
$c->_scrub_recurse($conf, $c->request->body_data);
}

# Normal query/POST body parameters:
$c->_scrub_recurse($conf, $c->request->parameters);

}

# Recursively scrub param values...
sub _scrub_recurse {
my ($c, $conf, $data) = @_;

# If the thing we've got is a hashref, walk over its keys, checking
# whether we should ignore, otherwise, do the needful
if (ref $data eq 'HASH') {
for my $key (keys %$data) {
if (!$c->_should_scrub_param($conf, $key)) {
next;
}

# If we only want to operate on certain params, do that checking
# now...
if ($conf && $conf->{ignore_params}) {
my $ignore_params = $c->config->{scrubber}{ignore_params};
if (ref $ignore_params ne 'ARRAY') {
$ignore_params = [ $ignore_params ];
# OK, it's fine to fettle with this key - if its value is
# a ref, recurse, otherwise, scrub
if (my $ref = ref $data->{$key}) {
$c->_scrub_recurse($conf, $data->{$key});
} else {
# Alright, non-ref value, so scrub it
# FIXME why did we have to have this ref-ref handling fun?
#$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
$data->{$key} = $c->_scrubber->scrub($data->{$key});
}
for my $ignore_param (@$ignore_params) {
if (ref $ignore_param eq 'Regexp') {
next param if $param =~ $ignore_param;
} else {
next param if $param eq $ignore_param;
}
}
} elsif (ref $data eq 'ARRAY') {
# Simple - scrub all the values
$_ = $c->_scrubber->scrub($_) for @$data;
for (@$data) {
if (ref $_) {
$c->_scrub_recurse($conf, $_);
} else {
$_ = $c->_scrubber->scrub($_);
}
}
}
} elsif (ref $data eq 'CODE') {
$c->log->debug("Can't scrub a coderef!");
} else {
# This shouldn't happen, as we should always start with a ref,
# and non-ref hash/array values should have been handled above.
$c->log->debug("Non-ref to scrub - should this happen?");
}
}

# If we're still here, we want to scrub this param's value.
$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
sub _should_scrub_param {
my ($c, $conf, $param) = @_;
# If we only want to operate on certain params, do that checking
# now...
if ($conf && $conf->{ignore_params}) {
my $ignore_params = $c->config->{scrubber}{ignore_params};
if (ref $ignore_params ne 'ARRAY') {
$ignore_params = [ $ignore_params ];
}
for my $ignore_param (@$ignore_params) {
if (ref $ignore_param eq 'Regexp') {
return if $param =~ $ignore_param;
} else {
return if $param eq $ignore_param;
}
}
}

# If we've not bailed above, we didn't match any ignore_params
# entries, or didn't have any, so we do want to scrub
return 1;
}

__PACKAGE__->meta->make_immutable;
Expand Down
73 changes: 70 additions & 3 deletions t/03_params.t
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,86 @@ use Test::More;
my $req = POST('/', [foo => 'bar']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'parameter ok');
is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
}
{
my $req = POST('/', [foo => 'bar<script>alert("0");</script>']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), 'bar');
is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
}
{
# we allow <b> in the test app config so this should not be stripped
my $req = POST('/', [foo => '<b>bar</b>']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), '<b>bar</b>', 'parameter ok');
is($c->req->param('foo'), '<b>bar</b>', 'Allowed tag not stripped');
}
{
diag "HTML left alone in ignored field - by regex match";
my $value = '<h1>Bar</h1><p>Foo</p>';
my $req = POST('/', [foo_html => $value]);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is(
$c->req->param('foo_html'),
$value,
'HTML left alone in ignored (by regex) field',
);
}
{
diag "HTML left alone in ignored field - by name";
my $value = '<h1>Bar</h1><p>Foo</p>';
my $req = POST('/', [ignored_param => $value]);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is(
$c->req->param('ignored_param'),
$value,
'HTML left alone in ignored (by name) field',
);
}

{
# Test that data in a JSON body POSTed gets scrubbed too
my $json_body = <<JSON;
{
"foo": "Top-level <img src=foo.jpg title=fun>",
"baz":{
"one":"Second-level <img src=test.jpg>"
},
"arr": [
"one test <img src=arrtest1.jpg>",
"two <script>window.alert('XSS!');</script>"
],
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
}
JSON
my $req = POST('/',
Content_Type => 'application/json', Content => $json_body
);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is(
$c->req->body_data->{foo},
'Top-level ', # note trailing space where img was removed
'Top level body param scrubbed',
);
is(
$c->req->body_data->{baz}{one},
'Second-level ',
'Second level body param scrubbed',
);
is(
$c->req->body_data->{arr}[0],
'one test ',
'Second level array contents scrubbbed',
);
is(
$c->req->body_data->{some_html},
'Leave <b>this</b> alone: <img src=allowed.gif>',
'Body data param matching ignore_params left alone',
);
}

done_testing();
Expand Down
53 changes: 0 additions & 53 deletions t/05_ignore_params.t

This file was deleted.

11 changes: 10 additions & 1 deletion t/lib/MyApp03.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@ extends 'Catalyst';

__PACKAGE__->config(
name => 'MyApp03',
scrubber => [allow => [qw/br hr b/],]
scrubber => {

auto => 1,

ignore_params => [ qr/_html$/, 'ignored_param' ],

# params for HTML::Scrubber
params => [
allow => [qw/br hr b/],
],
}
);
__PACKAGE__->setup();

Expand Down
22 changes: 0 additions & 22 deletions t/lib/MyApp05.pm

This file was deleted.

17 changes: 0 additions & 17 deletions t/lib/MyApp05/Controller/Root.pm

This file was deleted.

0 comments on commit 2f85e05

Please sign in to comment.