From 0a08d644dacdb0bd20fc059025f1ca9c0641f40f Mon Sep 17 00:00:00 2001 From: Beth Skurrie Date: Sat, 10 Dec 2022 15:28:34 +1100 Subject: [PATCH] fix: check that request body does not contain any invalid UTF-8 characters before JSON parsing JSON parse allows characters to be deserialised that cannot be serialised. --- .../api/contracts/publish_contracts_schema.rb | 14 ++++++- .../api/contracts/utf_8_validation.rb | 17 +++++++++ .../api/resources/base_resource.rb | 37 +++++++------------ lib/pact_broker/locale/en.yml | 1 + spec/features/publish_pact_all_in_one_spec.rb | 6 +++ spec/integration/endpoints/non_utf_8_spec.rb | 2 +- .../publish_contracts_schema_spec.rb | 10 +++++ 7 files changed, 62 insertions(+), 25 deletions(-) create mode 100644 lib/pact_broker/api/contracts/utf_8_validation.rb diff --git a/lib/pact_broker/api/contracts/publish_contracts_schema.rb b/lib/pact_broker/api/contracts/publish_contracts_schema.rb index dc135ad56..a8f7f883c 100644 --- a/lib/pact_broker/api/contracts/publish_contracts_schema.rb +++ b/lib/pact_broker/api/contracts/publish_contracts_schema.rb @@ -2,6 +2,7 @@ require "pact_broker/api/contracts/dry_validation_workarounds" require "pact_broker/api/contracts/dry_validation_predicates" require "pact_broker/messages" +require "pact_broker/api/contracts/utf_8_validation" module PactBroker module Api @@ -11,6 +12,10 @@ class PublishContractsSchema using PactBroker::HashRefinements extend PactBroker::Messages + class << self + include PactBroker::Api::Contracts::UTF8Validation + end + SCHEMA = Dry::Validation.Schema do configure do predicates(DryValidationPredicates) @@ -83,6 +88,14 @@ def self.validate_encoding(contract, i, errors) if contract[:decodedContent].nil? add_contract_error(:content, message("errors.base64?", scope: nil), i, errors) end + + if contract[:decodedContent] + char_number, fragment = fragment_before_invalid_utf_8_char(contract[:decodedContent]) + if char_number + error_message = message("errors.non_utf_8_char_in_contract", char_number: char_number, fragment: fragment) + add_contract_error(:content, error_message, i, errors) + end + end end def self.validate_content_matches_content_type(contract, i, errors) @@ -91,7 +104,6 @@ def self.validate_content_matches_content_type(contract, i, errors) end end - def self.add_contract_error(field, message, i, errors) errors[:contracts] ||= {} errors[:contracts][i] ||= {} diff --git a/lib/pact_broker/api/contracts/utf_8_validation.rb b/lib/pact_broker/api/contracts/utf_8_validation.rb new file mode 100644 index 000000000..617bc13e1 --- /dev/null +++ b/lib/pact_broker/api/contracts/utf_8_validation.rb @@ -0,0 +1,17 @@ +module PactBroker + module Api + module Contracts + module UTF8Validation + def fragment_before_invalid_utf_8_char(string) + string.force_encoding('UTF-8').each_char.with_index do | char, index | + if !char.valid_encoding? + fragment = index < 100 ? string[0...index] : string[index-100...index] + return index + 1, fragment + end + end + nil + end + end + end + end +end diff --git a/lib/pact_broker/api/resources/base_resource.rb b/lib/pact_broker/api/resources/base_resource.rb index c20fc69e2..0f6dc1eb4 100644 --- a/lib/pact_broker/api/resources/base_resource.rb +++ b/lib/pact_broker/api/resources/base_resource.rb @@ -11,6 +11,7 @@ require "pact_broker/api/resources/authorization" require "pact_broker/errors" require "pact_broker/api/resources/error_handling_methods" +require "pact_broker/api/contracts/utf_8_validation" module PactBroker module Api @@ -24,6 +25,7 @@ class BaseResource < Webmachine::Resource include PactBroker::Api::Resources::Authentication include PactBroker::Api::Resources::Authorization include PactBroker::Api::Resources::ErrorHandlingMethods + include PactBroker::Api::Contracts::UTF8Validation include PactBroker::Logging @@ -124,25 +126,10 @@ def params(options = {}) @params_with_string_keys ||= JSON.parse(request_body, { symbolize_names: false }.merge(PACT_PARSING_OPTIONS)) #Not load! Otherwise it will try to load Ruby classes. end rescue StandardError => e - fragment = fragment_before_invalid_utf_8_char - - if fragment - raise NonUTF8CharacterFound.new(message("errors.non_utf_8_char_in_request_body", char_number: fragment.length + 1, fragment: fragment)) - else - raise InvalidJsonError.new(e.message) - end + raise InvalidJsonError.new(e.message) end # rubocop: enable Metrics/CyclomaticComplexity - def fragment_before_invalid_utf_8_char - request_body.each_char.with_index do | char, index | - if !char.valid_encoding? - return index < 100 ? request_body[0...index] : request_body[index-100...index] - end - end - nil - end - def params_with_string_keys params(symbolize_names: false) end @@ -193,13 +180,17 @@ def pacticipant_specified? def invalid_json? begin - params - false - rescue NonUTF8CharacterFound => e - logger.info(e.message) # Don't use the default SemanticLogger error logging method because it will try and print out the cause which will contain non UTF-8 chars in the message - set_json_error_message(e.message) - response.headers["Content-Type"] = error_response_content_type - true + char_number, fragment = fragment_before_invalid_utf_8_char(request_body) + if char_number + error_message = message("errors.non_utf_8_char_in_request_body", char_number: char_number, fragment: fragment) + logger.info(error_message) + set_json_error_message(error_message) + response.headers["Content-Type"] = error_response_content_type + true + else + params + false + end rescue StandardError => e message = "#{e.cause ? e.cause.class.name : e.class.name} - #{e.message}" logger.info(message) diff --git a/lib/pact_broker/locale/en.yml b/lib/pact_broker/locale/en.yml index 7f27e6ddb..7d188bea3 100644 --- a/lib/pact_broker/locale/en.yml +++ b/lib/pact_broker/locale/en.yml @@ -109,6 +109,7 @@ en: new_line_in_url_path: URL path cannot contain a new line character. tab_in_url_path: URL path cannot contain a tab character. non_utf_8_char_in_request_body: "Request body has a non UTF-8 character at char %{char_number} and cannot be parsed as JSON. Fragment preceding invalid character is: '%{fragment}'" + non_utf_8_char_in_contract: "Contract has a non UTF-8 character at char %{char_number} and cannot be parsed as JSON. Fragment preceding invalid character is: '%{fragment}'" "400": title: 400 Malformed Request diff --git a/spec/features/publish_pact_all_in_one_spec.rb b/spec/features/publish_pact_all_in_one_spec.rb index 45f0245be..5999862a6 100644 --- a/spec/features/publish_pact_all_in_one_spec.rb +++ b/spec/features/publish_pact_all_in_one_spec.rb @@ -40,6 +40,12 @@ it { is_expected.to be_a_json_error_response("missing") } end + context "with a contract that contains a non utf-8 char" do + let(:contract) { "{\"key\": \"ABCDEFG\x8FDEF\" }" } + + its(:status) { is_expected.to eq 400 } + end + context "with a conflicting pact" do before do allow(PactBroker.configuration).to receive(:allow_dangerous_contract_modification).and_return(false) diff --git a/spec/integration/endpoints/non_utf_8_spec.rb b/spec/integration/endpoints/non_utf_8_spec.rb index 563d147f9..4534b755c 100644 --- a/spec/integration/endpoints/non_utf_8_spec.rb +++ b/spec/integration/endpoints/non_utf_8_spec.rb @@ -20,7 +20,7 @@ end it "truncates the fragement included in the error message" do - expect(JSON.parse(subject.body)).to eq("error" => "Request body has a non UTF-8 character at char 101 and cannot be parsed as JSON. Fragment preceding invalid character is: '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890'") + expect(JSON.parse(subject.body)).to eq("error" => "Request body has a non UTF-8 character at char 111 and cannot be parsed as JSON. Fragment preceding invalid character is: '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890'") end end end diff --git a/spec/lib/pact_broker/api/contracts/publish_contracts_schema_spec.rb b/spec/lib/pact_broker/api/contracts/publish_contracts_schema_spec.rb index 7ef886470..727708c47 100644 --- a/spec/lib/pact_broker/api/contracts/publish_contracts_schema_spec.rb +++ b/spec/lib/pact_broker/api/contracts/publish_contracts_schema_spec.rb @@ -121,6 +121,16 @@ module Contracts expect(subject[:contracts]).to include "specification is missing at index 0" end end + + context "when there is a non UTF-8 character in the base64 decoded contract" do + let(:encoded_contract) { Base64.strict_encode64(decoded_content) } + let(:decoded_content) { "{\"key\": \"ABCDEFG\x8FDEF\" }" } + let(:decoded_parsed_content) { PactBroker::Pacts::Parse.call(decoded_content) } + + it "returns an error" do + expect(subject[:contracts].first).to include "UTF-8 character at char 17" + end + end end end end