diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 988850ce7..0048f5778 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1398,6 +1398,7 @@ def _outgoing_dedup_hash(self, event): def _url(self): return self.data["url"] + def __str__(self): max_event_len = 200 diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 688f9f599..7c9549269 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -891,7 +891,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"): xml_data (str): XML-formatted string containing elements. Returns: - set: A set of tuples containing the tags and their corresponding text values present in the XML object. + set: A set of tuples containing the tags and their corresponding sanitized text values present in the XML object. Raises: Returns an empty set if ParseError occurs. @@ -913,7 +913,10 @@ def extract_params_xml(xml_data, compare_mode="getparam"): while stack: current_element = stack.pop() if validate_parameter(current_element.tag, compare_mode): - tag_value_pairs.add((current_element.tag, current_element.text)) + # Sanitize the text value + text_value = current_element.text.strip() if current_element.text else None + sanitized_value = quote(text_value, safe='') if text_value else None + tag_value_pairs.add((current_element.tag, sanitized_value)) for child in current_element: stack.append(child) return tag_value_pairs diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index e007bff03..809b2d6e3 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -114,10 +114,10 @@ # For use with excavate parameters extractor input_tag_regex = re.compile( - r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w]*)[\"\']?[^>]*?>" + r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w\s]*)[\"\']?[^>]*?>" ) input_tag_regex2 = re.compile( - r"]*?\svalue=[\"\']?([:\-%\._=+\/\w]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" + r"]*?\svalue=[\"\']?([:\-%\._=+\/\w\s]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" ) input_tag_novalue_regex = re.compile(r"]*\b\svalue=)[^>]*?\sname=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>") # jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=") @@ -169,7 +169,7 @@ button_tag_regex2 = re.compile( r"]*?value=[\"\']?([\-%\._=+\/\w]*)[\"\']?[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" ) -tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\s\'\"\>]+)[\"\']?[^>]*>") +tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\'\"\>]+)[\"\']?[^>]*>") valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+" diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 279b61245..a274b8389 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -1053,8 +1053,6 @@ async def setup(self): return True async def search(self, data, event, content_type, discovery_context="HTTP response"): - # TODO: replace this JSON/XML extraction with our lightfuzz envelope stuff - if not data: return None decoded_data = await self.helpers.re.recursive_decode(data) @@ -1065,7 +1063,7 @@ async def search(self, data, event, content_type, discovery_context="HTTP respon "json": self.helpers.extract_params_json, "xml": self.helpers.extract_params_xml, } - + for source_type, extract_func in extraction_map.items(): if source_type in content_type_lower: results = extract_func(data) diff --git a/bbot/modules/lightfuzz_submodules/base.py b/bbot/modules/lightfuzz_submodules/base.py index b98e75a23..46a64f6c2 100644 --- a/bbot/modules/lightfuzz_submodules/base.py +++ b/bbot/modules/lightfuzz_submodules/base.py @@ -1,6 +1,7 @@ import copy import base64 import binascii +from urllib.parse import quote class BaseLightfuzz: def __init__(self, lightfuzz, event): @@ -50,15 +51,24 @@ def additional_params_process(self, additional_params, additional_params_populat new_additional_params[k] = v return new_additional_params - def compare_baseline( - self, event_type, probe, cookies, additional_params_populate_empty=False, speculative_mode="GETPARAM" - ): + def conditional_urlencode(self, probe, event_type, skip_urlencoding=False): + """Conditionally url-encodes the probe if the event type requires it and encoding is not skipped by the submodule. + We also don't encode if any envelopes are present. """ - Initializes the http_compare object and executes a probe to establish a baseline for comparison. + if event_type in ["GETPARAM", "COOKIE"] and not skip_urlencoding and getattr(self.event, "envelopes", None): + # Exclude '&' from being encoded since we are operating on full query strings + return quote(probe, safe='&') + return probe - Handles each of the types of WEB_PARAMETERS (GETPARAM, COOKIE, HEADER, POSTPARAM, BODYJSON) - """ + def compare_baseline( + self, event_type, probe, cookies, additional_params_populate_empty=False, speculative_mode="GETPARAM", skip_urlencoding=False + ): + + # Transparently pack the probe value into the envelopes, if present probe = self.outgoing_probe_value(probe) + + # URL Encode the probe if the event type is GETPARAM or COOKIE, if there are no envelopes, and the submodule did not opt-out with skip_urlencoding + probe = self.conditional_urlencode(probe, event_type, skip_urlencoding) http_compare = None if event_type == "SPECULATIVE": @@ -66,6 +76,7 @@ def compare_baseline( if event_type == "GETPARAM": baseline_url = f"{self.event.data['url']}?{self.event.data['name']}={probe}" + if "additional_params" in self.event.data.keys() and self.event.data["additional_params"] is not None: baseline_url = self.lightfuzz.helpers.add_get_params( baseline_url, self.event.data["additional_params"], encode=False @@ -134,13 +145,18 @@ async def compare_probe( additional_params_populate_empty=False, additional_params_override={}, speculative_mode="GETPARAM", + skip_urlencoding=False, ): - """ - Executes a probe to compare against a baseline. - """ + + # Transparently pack the probe value into the envelopes, if present probe = self.outgoing_probe_value(probe) - additional_params = copy.deepcopy(self.event.data.get("additional_params", {})) + + # URL Encode the probe if the event type is GETPARAM or COOKIE, if there are no envelopes, and the submodule did not opt-out with skip_urlencoding + probe = self.conditional_urlencode(probe, event_type, skip_urlencoding) + # Create a complete copy to avoid modifying the original additional_params + additional_params = copy.deepcopy(self.event.data.get("additional_params", {})) + if additional_params_override: for k, v in additional_params_override.items(): additional_params[k] = v @@ -185,19 +201,26 @@ async def standard_probe( additional_params_populate_empty=False, speculative_mode="GETPARAM", allow_redirects=False, + skip_urlencoding=False, ): """ Send a probe to the target URL, abstracting away the details associated with each WEB_PARAMETER type. """ + # Transparently pack the probe value into the envelopes, if present probe = self.outgoing_probe_value(probe) + # URL Encode the probe if the event type is GETPARAM or COOKIE, if there are no envelopes, and the submodule did not opt-out with skip_urlencoding + probe = self.conditional_urlencode(probe, event_type, skip_urlencoding) + if event_type == "SPECULATIVE": event_type = speculative_mode method = "GET" + if event_type == "GETPARAM": url = f"{self.event.data['url']}?{self.event.data['name']}={probe}" + if "additional_params" in self.event.data.keys() and self.event.data["additional_params"] is not None: url = self.lightfuzz.helpers.add_get_params( url, self.event.data["additional_params"], encode=False @@ -216,9 +239,6 @@ async def standard_probe( json_data = None if event_type == "POSTPARAM": - - - method = "POST" data = {self.event.data["name"]: probe} if self.event.data["additional_params"] is not None: diff --git a/bbot/modules/lightfuzz_submodules/cmdi.py b/bbot/modules/lightfuzz_submodules/cmdi.py index 087ba6f66..2a8ba7749 100644 --- a/bbot/modules/lightfuzz_submodules/cmdi.py +++ b/bbot/modules/lightfuzz_submodules/cmdi.py @@ -26,10 +26,13 @@ async def fuzz(self): try: # add "echo" to the cmdi probe value to construct the command to be executed echo_probe = f"{probe_value}{p} echo {canary} {p}" + # we have to handle our own URL-encoding here, because our payloads include the & character if self.event.data["type"] == "GETPARAM": echo_probe = urllib.parse.quote(echo_probe.encode(), safe="") + # send cmdi probe and compare with baseline response - cmdi_probe = await self.compare_probe(http_compare, self.event.data["type"], echo_probe, cookies) + cmdi_probe = await self.compare_probe(http_compare, self.event.data["type"], echo_probe, cookies, skip_urlencoding=True) + # ensure we received an HTTP response if cmdi_probe[3]: # check if the canary is in the response and the word "echo" is NOT in the response text, ruling out mere reflection of the entire probe value without execution @@ -66,10 +69,10 @@ async def fuzz(self): } # payload is an nslookup command that includes the interactsh domain prepended the previously generated subdomain tag interactsh_probe = f"{p} nslookup {subdomain_tag}.{self.lightfuzz.interactsh_domain} {p}" - + # we have to handle our own URL-encoding here, because our payloads include the & character if self.event.data["type"] == "GETPARAM": interactsh_probe = urllib.parse.quote(interactsh_probe.encode(), safe="") # we send the probe here, and any positive detections are processed in the interactsh_callback defined in lightfuzz.py await self.standard_probe( - self.event.data["type"], cookies, f"{probe_value}{interactsh_probe}", timeout=15 + self.event.data["type"], cookies, f"{probe_value}{interactsh_probe}", timeout=15, skip_urlencoding=True ) diff --git a/bbot/modules/lightfuzz_submodules/path.py b/bbot/modules/lightfuzz_submodules/path.py index 45c2a147b..a9aac82cc 100644 --- a/bbot/modules/lightfuzz_submodules/path.py +++ b/bbot/modules/lightfuzz_submodules/path.py @@ -66,12 +66,12 @@ async def fuzz(self): confirmations = 0 while iterations > 0: try: - http_compare = self.compare_baseline(self.event.data["type"], probe_value, cookies) + http_compare = self.compare_baseline(self.event.data["type"], probe_value, cookies, skip_urlencoding=True) singledot_probe = await self.compare_probe( - http_compare, self.event.data["type"], payloads["singledot_payload"], cookies + http_compare, self.event.data["type"], payloads["singledot_payload"], cookies, skip_urlencoding=True ) doubledot_probe = await self.compare_probe( - http_compare, self.event.data["type"], payloads["doubledot_payload"], cookies + http_compare, self.event.data["type"], payloads["doubledot_payload"], cookies, skip_urlencoding=True ) # if singledot_probe[0] is true, the response is the same as the baseline. This indicates adding a single dot did not break the functionality # next, if doubledot_probe[0] is false, the response is different from the baseline. This further indicates that a real path is being manipulated @@ -116,7 +116,7 @@ async def fuzz(self): } for path, trigger in absolute_paths.items(): - r = await self.standard_probe(self.event.data["type"], cookies, path) + r = await self.standard_probe(self.event.data["type"], cookies, path, skip_urlencoding=True) if r and trigger in r.text: self.results.append( { diff --git a/bbot/modules/lightfuzz_submodules/ssti.py b/bbot/modules/lightfuzz_submodules/ssti.py index 2cc34cbfb..77e032069 100644 --- a/bbot/modules/lightfuzz_submodules/ssti.py +++ b/bbot/modules/lightfuzz_submodules/ssti.py @@ -8,7 +8,8 @@ async def fuzz(self): # These are common SSTI payloads, each attempting to trigger an integer multiplication which would produce an expected value ssti_probes = ["<%25%3d%201337*1337%20%25>","<%= 1337*1337 %>", "${1337*1337}", "%24%7b1337*1337%7d", "1,787{{z}},569"] for probe_value in ssti_probes: - r = await self.standard_probe(self.event.data["type"], cookies, probe_value, allow_redirects=True) + r = await self.standard_probe(self.event.data["type"], cookies, probe_value, allow_redirects=True, skip_urlencoding=True) + # look for the expected value in the response if r and ("1787569" in r.text or "1,787,569" in r.text): self.results.append( diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index 142bf1cdc..ce2fbb7c9 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -494,7 +494,6 @@ def echo_cookies_handler(request): bbot_httpserver.expect_request(uri=endpoint).respond_with_handler(echo_cookies_handler) scan1 = bbot_scanner("127.0.0.1", config={"web": {"debug": True}}) r1 = await scan1.helpers.request(url, cookies={"foo": "bar"}) - print(r1.text) assert r1 is not None, "Request to self-signed SSL server went through even with ssl_verify=True" assert "bar" in r1.text diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index 76975b69f..8bec62494 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -457,6 +457,7 @@ class TestExcavateParameterExtraction(TestExcavate):


+

Simple POST Form

@@ -502,8 +503,10 @@ def check(self, module_test, events): found_htmltags_a = False found_htmltags_img = False found_select_noquotes = False - + avoid_truncated_values = True + found_form_input_with_spaces = False for e in events: + if e.type == "WEB_PARAMETER": if e.data["description"] == "HTTP Extracted Parameter [jqueryget] (GET jquery Submodule)": found_jquery_get = True @@ -548,11 +551,19 @@ def check(self, module_test, events): if "csrf" in e.data["additional_params"].keys(): found_select_noquotes = True + if e.data["description"] == "HTTP Extracted Parameter [q4] (GET Form Submodule)": + if e.data["original_value"] == "trees and forests": + found_form_input_with_spaces = True + if e.data["original_value"] == "trees": + avoid_truncated_values = False + assert found_jquery_get, "Did not extract Jquery GET parameters" assert found_jquery_post, "Did not extract Jquery POST parameters" assert found_form_get, "Did not extract Form GET parameters" assert found_form_post, "Did not extract Form POST parameters" assert found_form_generic, "Did not extract Form (Generic) parameters" + assert found_form_input_with_spaces, "Did not extract Form input with spaces" + assert avoid_truncated_values, "Emitted a parameter with spaces without the entire value" assert found_jquery_get_original_value, "Did not extract Jquery GET parameter original_value" assert found_jquery_post_original_value, "Did not extract Jquery POST parameter original_value" assert found_form_get_original_value, "Did not extract Form GET parameter original_value" @@ -779,6 +790,30 @@ def check(self, module_test, events): assert excavate_xml_extraction, "Excavate failed to extract xml parameter" +class TestExcavateParameterExtraction_xml_invalid(TestExcavateParameterExtraction_xml): + getparam_extract_xml = """ + + 1 + invalid\nwith\nnewlines + + """ + + async def setup_after_prep(self, module_test): + respond_args = {"response_data": self.getparam_extract_xml, "headers": {"Content-Type": "application/xml"}} + module_test.set_expect_requests(respond_args=respond_args) + + def check(self, module_test, events): + excavate_xml_extraction = False + for e in events: + if e.type == "WEB_PARAMETER": + if ( + "HTTP Extracted Parameter (speculative from xml content) [newlines]" + in e.data["description"] + and "\n" not in e.data["original_value"] + ): + excavate_xml_extraction = True + assert excavate_xml_extraction, "Excavate failed to extract xml parameter" + class TestExcavateParameterExtraction_inputtagnovalue(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] diff --git a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py index 665709e28..e81661a18 100644 --- a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +++ b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py @@ -294,9 +294,10 @@ def request_handler(self, request): value = qs.split("search=")[1] if "&" in value: value = value.split("&")[0] + xss_block = f"""
-

0 search results for '{unquote(base64.b64decode(value))}'

+

0 search results for '{unquote(base64.b64decode(unquote(value)))}'


""" @@ -337,6 +338,7 @@ def request_handler(self, request): """ if "search=" in qs: + value = qs.split("search=")[1] if "&" in value: value = value.split("&")[0] @@ -364,7 +366,6 @@ def request_handler(self, request): """ return Response(xss_block, status=200) - return Response(parameter_block, status=200) @@ -545,13 +546,15 @@ def check(self, module_test, events): # In Javascript XSS Detection class Test_Lightfuzz_xss_injs(Test_Lightfuzz_xss): - def request_handler(self, request): - qs = str(request.query_string.decode()) - parameter_block = """ + + parameter_block = """ Link """ + + def request_handler(self, request): + qs = str(request.query_string.decode()) if "language=" in qs: value = qs.split("=")[1] @@ -572,7 +575,7 @@ def request_handler(self, request): """ return Response(xss_block, status=200) - return Response(parameter_block, status=200) + return Response(self.parameter_block, status=200) async def setup_after_prep(self, module_test): module_test.scan.modules["lightfuzz"].helpers.rand_string = lambda *args, **kwargs: "AAAAAAAAAAAAAA" @@ -601,6 +604,44 @@ def check(self, module_test, events): assert xss_finding_emitted, "In Javascript XSS FINDING not emitted" +# XSS Parameter Needing URL-Encoding +class Test_Lightfuzz_urlencoding(Test_Lightfuzz_xss_injs): + config_overrides = { + "interactsh_disable": True, + "modules": { + "lightfuzz": { + "enabled_submodules": ["cmdi","crypto","path","serial","sqli","ssti","xss"], + } + }, + } + + + parameter_block = """ + + Link + + """ + + def check(self, module_test, events): + web_parameter_emitted = False + original_value_captured = False + xss_finding_emitted = False + for e in events: + if e.type == "WEB_PARAMETER": + if "HTTP Extracted Parameter [language]" in e.data["description"]: + web_parameter_emitted = True + if e.data["original_value"] is not None and e.data["original_value"] == "parameter with spaces": + original_value_captured = True + + if e.type == "FINDING": + if "Possible Reflected XSS. Parameter: [language] Context: [In Javascript]" in e.data["description"]: + xss_finding_emitted = True + + assert web_parameter_emitted, "WEB_PARAMETER was not emitted" + assert original_value_captured, "original_value not captured" + assert xss_finding_emitted, "In Javascript XSS FINDING not emitted" + + # SQLI Single Quote/Two Single Quote (getparam) class Test_Lightfuzz_sqli(ModuleTestBase): targets = ["http://127.0.0.1:8888"] @@ -934,10 +975,9 @@ def request_handler(self, request):

0 search results found


- """ - if "'%20AND%20(SLEEP(5))%20AND%20" in value: + """ + if "' AND (SLEEP(5)) AND '" in unquote(value): sleep(5) - return Response(sql_block, status=200) return Response(parameter_block, status=200) @@ -1245,7 +1285,7 @@ def request_handler(self, request): return response else: - if cookies["session"] == "rO0ABXQABHRlc3Q=": + if unquote(cookies["session"]) == "rO0ABXQABHRlc3Q=": return Response(java_serial_error_keyword, status=500) else: return Response(java_serial_error, status=500) @@ -1298,12 +1338,10 @@ def request_handler(self, request): """ if "search=" in qs: value = qs.split("=")[1] - if "&" in value: value = value.split("&")[0] - - if "%26%26%20echo%20" in value: - cmdi_value = value.split("%26%26%20echo%20")[1].split("%20")[0] + if "&& echo " in unquote(value): + cmdi_value = unquote(value).split("&& echo ")[1].split(" ")[0] else: cmdi_value = value cmdi_block = f""" @@ -1612,9 +1650,6 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests_handler(expect_args=re.compile(".*"), request_handler=self.request_handler) def check(self, module_test, events): - for e in events: - print(f"{e.type}: {e.data}") - web_parameter_extracted = False cryptographic_parameter_finding = False padding_oracle_detected = False