Skip to content

Commit

Permalink
Merge pull request #3 from bigpresh/bigpresh/scrub_deserialised_body_…
Browse files Browse the repository at this point in the history
…data_params

Scrub body_data / data params too (e.g. POSTed JSON)
  • Loading branch information
bigpresh authored Sep 18, 2023
2 parents 6ac9a8e + 4330290 commit a8b7b5e
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 94 deletions.
130 changes: 106 additions & 24 deletions lib/Catalyst/Plugin/HTML/Scrubber.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sub setup {
return $c->maybe::next::method(@_);
}

sub prepare_parameters {
sub execute {
my $c = shift;

$c->maybe::next::method(@_);
Expand All @@ -47,33 +47,113 @@ sub prepare_parameters {
sub html_scrub {
my ($c, $conf) = @_;

param:
for my $param (keys %{ $c->request->{parameters} }) {
#while (my ($param, $value) = each %{ $c->request->{parameters} }) {
my $value = \$c->request->{parameters}{$param};
if (ref $$value && ref $$value ne 'ARRAY') {
next param;
# If there's body_data - for e.g. a POSTed JSON body that was decoded -
# then we need to walk through it, scrubbing as appropriate
if (my $body_data = $c->request->body_data) {
$c->_scrub_recurse($conf, $c->request->body_data);
}

# And if Catalyst::Controller::REST is in use so we have $req->data,
# then scrub that too
if ($c->request->can('data')) {
my $data = $c->request->data;
if ($data) {
$c->_scrub_recurse($conf, $c->request->data);
}
}

# Normal query/POST body parameters:
$c->_scrub_recurse($conf, $c->request->parameters);

}

# If we only want to operate on certain params, do that checking
# now...
if ($conf && $conf->{ignore_params}) {
my $ignore_params = $c->config->{scrubber}{ignore_params};
if (ref $ignore_params ne 'ARRAY') {
$ignore_params = [ $ignore_params ];
# Recursively scrub param values...
sub _scrub_recurse {
my ($c, $conf, $data) = @_;

# If the thing we've got is a hashref, walk over its keys, checking
# whether we should ignore, otherwise, do the needful
if (ref $data eq 'HASH') {
for my $key (keys %$data) {
if (!$c->_should_scrub_param($conf, $key)) {
next;
}

# OK, it's fine to fettle with this key - if its value is
# a ref, recurse, otherwise, scrub
if (my $ref = ref $data->{$key}) {
$c->_scrub_recurse($conf, $data->{$key});
} else {
# Alright, non-ref value, so scrub it
# FIXME why did we have to have this ref-ref handling fun?
#$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
$data->{$key} = $c->_scrubber->scrub($data->{$key});
}
for my $ignore_param (@$ignore_params) {
if (ref $ignore_param eq 'Regexp') {
next param if $param =~ $ignore_param;
} else {
next param if $param eq $ignore_param;
}
}
} elsif (ref $data eq 'ARRAY') {
# Simple - scrub all the values
$_ = $c->_scrubber->scrub($_) for @$data;
for (@$data) {
if (ref $_) {
$c->_scrub_recurse($conf, $_);
} else {
$_ = $c->_scrubber->scrub($_);
}
}
}
} elsif (ref $data eq 'CODE') {
$c->log->debug("Can't scrub a coderef!");
} else {
# This shouldn't happen, as we should always start with a ref,
# and non-ref hash/array values should have been handled above.
$c->log->debug("Non-ref to scrub - should this happen?");
}
}

# If we're still here, we want to scrub this param's value.
$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
sub _should_scrub_param {
my ($c, $conf, $param) = @_;
# If we only want to operate on certain params, do that checking
# now...
if ($conf && $conf->{ignore_params}) {
my $ignore_params = $c->config->{scrubber}{ignore_params};
if (ref $ignore_params ne 'ARRAY') {
$ignore_params = [ $ignore_params ];
}
for my $ignore_param (@$ignore_params) {
if (ref $ignore_param eq 'Regexp') {
return if $param =~ $ignore_param;
} else {
return if $param eq $ignore_param;
}
}
}

# If we've not bailed above, we didn't match any ignore_params
# entries, or didn't have any, so we do want to scrub
return 1;
}


# Incredibly nasty monkey-patch to rewind filehandle before parsing - see
# https://github.com/perl-catalyst/catalyst-runtime/pull/186
# First, get the default handlers hashref:
my $default_data_handlers = Catalyst->default_data_handlers();

# Wrap the coderef for application/json in one that rewinds the filehandle
# first:
my $orig_json_handler = $default_data_handlers->{'application/json'};
$default_data_handlers->{'application/json'} = sub {
$_[0]->seek(0,0); # rewind $fh arg
$orig_json_handler->(@_);
};


{
# and now replace the original default_data_handlers() with a version that
# returns our modified handlers
no warnings 'redefine';
*Catalyst::default_data_handlers = sub {
return $default_data_handlers;
};
}

__PACKAGE__->meta->make_immutable;
Expand Down Expand Up @@ -124,9 +204,11 @@ See SYNOPSIS for how to configure the plugin, both with its own configuration
passing on any options from L<HTML::Scrubber> to control exactly what
scrubbing happens.
=item prepare_parameters
=item dispatch
Sanitize HTML tags in all parameters (unless `ignore_params` exempts them).
Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
accessed via `$c->req->body_data` or `$c->req->data`.
=back
Expand Down
2 changes: 1 addition & 1 deletion t/00_compile.t
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use strict;
use Test::More tests => 1;

BEGIN { use_ok 'Catalyst::Plugin::HTML::Scrubber' }
BEGIN { use Catalyst; use_ok 'Catalyst::Plugin::HTML::Scrubber' }
81 changes: 74 additions & 7 deletions t/03_params.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,93 @@ use Test::More;
{
my $req = GET('/');
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($res->code, RC_OK, 'response ok');
is($res->content, 'index', 'content ok');
}
{
my $req = POST('/', [foo => 'bar']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'parameter ok');
is($res->code, RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
}
{
my $req = POST('/', [foo => 'bar<script>alert("0");</script>']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), 'bar');
is($res->code, RC_OK, 'response ok');
is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
}
{
# we allow <b> in the test app config so this should not be stripped
my $req = POST('/', [foo => '<b>bar</b>']);
my ($res, $c) = ctx_request($req);
ok($res->code == RC_OK, 'response ok');
is($c->req->param('foo'), '<b>bar</b>', 'parameter ok');
is($res->code, RC_OK, 'response ok');
is($c->req->param('foo'), '<b>bar</b>', 'Allowed tag not stripped');
}
{
diag "HTML left alone in ignored field - by regex match";
my $value = '<h1>Bar</h1><p>Foo</p>';
my $req = POST('/', [foo_html => $value]);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->param('foo_html'),
$value,
'HTML left alone in ignored (by regex) field',
);
}
{
diag "HTML left alone in ignored field - by name";
my $value = '<h1>Bar</h1><p>Foo</p>';
my $req = POST('/', [ignored_param => $value]);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->param('ignored_param'),
$value,
'HTML left alone in ignored (by name) field',
);
}

{
# Test that data in a JSON body POSTed gets scrubbed too
my $json_body = <<JSON;
{
"foo": "Top-level <img src=foo.jpg title=fun>",
"baz":{
"one":"Second-level <img src=test.jpg>"
},
"arr": [
"one test <img src=arrtest1.jpg>",
"two <script>window.alert('XSS!');</script>"
],
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
}
JSON
my $req = POST('/',
Content_Type => 'application/json', Content => $json_body
);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->body_data->{foo},
'Top-level ', # note trailing space where img was removed
'Top level body param scrubbed',
);
is(
$c->req->body_data->{baz}{one},
'Second-level ',
'Second level body param scrubbed',
);
is(
$c->req->body_data->{arr}[0],
'one test ',
'Second level array contents scrubbbed',
);
is(
$c->req->body_data->{some_html},
'Leave <b>this</b> alone: <img src=allowed.gif>',
'Body data param matching ignore_params left alone',
);
}

done_testing();
Expand Down
53 changes: 0 additions & 53 deletions t/05_ignore_params.t

This file was deleted.

60 changes: 60 additions & 0 deletions t/05_rest.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use strict;
use warnings;

use FindBin qw($Bin);
use lib "$Bin/lib";

use Test::More;


eval 'use Catalyst::Controller::REST';
plan skip_all => 'Catalyst::Controller::REST not available, skip REST tests' if $@;

use Catalyst::Test 'MyApp05';
use HTTP::Request::Common;
use HTTP::Status;

{
# Test that data in a JSON body POSTed gets scrubbed too
my $json_body = <<JSON;
{
"foo": "Top-level <img src=foo.jpg title=fun>",
"baz":{
"one":"Second-level <img src=test.jpg>"
},
"arr": [
"one test <img src=arrtest1.jpg>",
"two <script>window.alert('XSS!');</script>"
],
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
}
JSON
my $req = POST('/foo',
Content_Type => 'application/json', Content => $json_body
);
my ($res, $c) = ctx_request($req);
is($res->code, RC_OK, 'response ok');
is(
$c->req->data->{foo},
'Top-level ', # note trailing space where img was removed
'Top level body param scrubbed',
);
is(
$c->req->data->{baz}{one},
'Second-level ',
'Second level body param scrubbed',
);
is(
$c->req->data->{arr}[0],
'one test ',
'Second level array contents scrubbbed',
);
is(
$c->req->data->{some_html},
'Leave <b>this</b> alone: <img src=allowed.gif>',
'Body data param matching ignore_params left alone',
);
}

done_testing();

11 changes: 10 additions & 1 deletion t/lib/MyApp03.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@ extends 'Catalyst';

__PACKAGE__->config(
name => 'MyApp03',
scrubber => [allow => [qw/br hr b/],]
scrubber => {

auto => 1,

ignore_params => [ qr/_html$/, 'ignored_param' ],

# params for HTML::Scrubber
params => [
allow => [qw/br hr b/],
],
}
);
__PACKAGE__->setup();

Expand Down
Loading

0 comments on commit a8b7b5e

Please sign in to comment.