From 89994b980600497cc44cb399d6fa3f6137fcce58 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Thu, 23 Jan 2025 12:35:07 -0500 Subject: [PATCH 1/2] adding conditional url-encoding, fixing xml parameter extraction bug --- bbot/core/event/base.py | 1 + bbot/core/helpers/misc.py | 7 +- bbot/core/helpers/regexes.py | 7 +- bbot/modules/internal/excavate.py | 4 +- bbot/modules/lightfuzz_submodules/base.py | 39 ++++++++--- bbot/modules/lightfuzz_submodules/cmdi.py | 7 +- bbot/modules/lightfuzz_submodules/path.py | 8 +-- bbot/modules/lightfuzz_submodules/ssti.py | 2 +- bbot/test/test_step_1/test_web.py | 1 - .../module_tests/test_module_excavate.py | 37 +++++++++- .../module_tests/test_module_lightfuzz.py | 70 ++++++++++++++----- 11 files changed, 139 insertions(+), 44 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 988850ce70..0048f5778d 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1398,6 +1398,7 @@ def _outgoing_dedup_hash(self, event): def _url(self): return self.data["url"] + def __str__(self): max_event_len = 200 diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 688f9f599c..7c95492693 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -891,7 +891,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"): xml_data (str): XML-formatted string containing elements. Returns: - set: A set of tuples containing the tags and their corresponding text values present in the XML object. + set: A set of tuples containing the tags and their corresponding sanitized text values present in the XML object. Raises: Returns an empty set if ParseError occurs. @@ -913,7 +913,10 @@ def extract_params_xml(xml_data, compare_mode="getparam"): while stack: current_element = stack.pop() if validate_parameter(current_element.tag, compare_mode): - tag_value_pairs.add((current_element.tag, current_element.text)) + # Sanitize the text value + text_value = current_element.text.strip() if current_element.text else None + sanitized_value = quote(text_value, safe='') if text_value else None + tag_value_pairs.add((current_element.tag, sanitized_value)) for child in current_element: stack.append(child) return tag_value_pairs diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index 1a4f712b95..f740fc9b79 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -114,12 +114,11 @@ # For use with excavate parameters extractor input_tag_regex = re.compile( - r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w]*)[\"\']?[^>]*?>" + r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w\s]*)[\"\']?[^>]*?>" ) input_tag_regex2 = re.compile( - r"]*?\svalue=[\"\']?([:\-%\._=+\/\w]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" + r"]*?\svalue=[\"\']?([:\-%\._=+\/\w\s]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" ) -input_tag_novalue_regex = re.compile(r"]*\bvalue=)[^>]*?name=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>") input_tag_novalue_regex = re.compile(r"]*\b\svalue=)[^>]*?\sname=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>") # jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=") # jquery_get_regex = re.compile(r"\$.get\([\'\"].+[\'\"].+\{(.+)\}") @@ -164,7 +163,7 @@ button_tag_regex2 = re.compile( r"]*?value=[\"\']?([\-%\._=+\/\w]*)[\"\']?[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" ) -tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\s\'\"\>]+)[\"\']?[^>]*>") +tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\'\"\>]+)[\"\']?[^>]*>") valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+" diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 02fb2da796..5ca12d9d3a 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -1011,8 +1011,6 @@ async def setup(self): return True async def search(self, data, event, content_type, discovery_context="HTTP response"): - # TODO: replace this JSON/XML extraction with our lightfuzz envelope stuff - if not data: return None decoded_data = await self.helpers.re.recursive_decode(data) @@ -1023,7 +1021,7 @@ async def search(self, data, event, content_type, discovery_context="HTTP respon "json": self.helpers.extract_params_json, "xml": self.helpers.extract_params_xml, } - + for source_type, extract_func in extraction_map.items(): if source_type in content_type_lower: results = extract_func(data) diff --git a/bbot/modules/lightfuzz_submodules/base.py b/bbot/modules/lightfuzz_submodules/base.py index 13b9363799..e7262bf370 100644 --- a/bbot/modules/lightfuzz_submodules/base.py +++ b/bbot/modules/lightfuzz_submodules/base.py @@ -1,4 +1,5 @@ import copy +from urllib.parse import quote class BaseLightfuzz: @@ -18,10 +19,24 @@ def additional_params_process(self, additional_params, additional_params_populat new_additional_params[k] = v return new_additional_params + def conditional_urlencode(self, probe, event_type, skip_urlencoding=False): + """Conditionally url-encodes the probe if the event type requires it and encoding is not skipped by the submodule. + We also don't encode if any envelopes are present. + """ + if event_type in ["GETPARAM", "COOKIE"] and not skip_urlencoding and getattr(self.event, "envelopes", None): + # Exclude '&' from being encoded since we are operating on full query strings + return quote(probe, safe='&') + return probe + def compare_baseline( - self, event_type, probe, cookies, additional_params_populate_empty=False, speculative_mode="GETPARAM" + self, event_type, probe, cookies, additional_params_populate_empty=False, speculative_mode="GETPARAM", skip_urlencoding=False ): + + # Transparently pack the probe value into the envelopes, if present probe = self.outgoing_probe_value(probe) + + # URL Encode the probe if the event type is GETPARAM or COOKIE, if there are no envelopes, and the submodule did not opt-out with skip_urlencoding + probe = self.conditional_urlencode(probe, event_type, skip_urlencoding) http_compare = None if event_type == "SPECULATIVE": @@ -29,6 +44,7 @@ def compare_baseline( if event_type == "GETPARAM": baseline_url = f"{self.event.data['url']}?{self.event.data['name']}={probe}" + if "additional_params" in self.event.data.keys() and self.event.data["additional_params"] is not None: baseline_url = self.lightfuzz.helpers.add_get_params( baseline_url, self.event.data["additional_params"], encode=False @@ -94,8 +110,15 @@ async def compare_probe( additional_params_populate_empty=False, additional_params_override={}, speculative_mode="GETPARAM", + skip_urlencoding=False, ): + + # Transparently pack the probe value into the envelopes, if present probe = self.outgoing_probe_value(probe) + + # URL Encode the probe if the event type is GETPARAM or COOKIE, if there are no envelopes, and the submodule did not opt-out with skip_urlencoding + probe = self.conditional_urlencode(probe, event_type, skip_urlencoding) + additional_params = copy.deepcopy(self.event.data.get("additional_params", {})) if additional_params_override: for k, v in additional_params_override.items(): @@ -141,16 +164,23 @@ async def standard_probe( additional_params_populate_empty=False, speculative_mode="GETPARAM", allow_redirects=False, + skip_urlencoding=False, ): + # Transparently pack the probe value into the envelopes, if present probe = self.outgoing_probe_value(probe) + # URL Encode the probe if the event type is GETPARAM or COOKIE, if there are no envelopes, and the submodule did not opt-out with skip_urlencoding + probe = self.conditional_urlencode(probe, event_type, skip_urlencoding) + if event_type == "SPECULATIVE": event_type = speculative_mode method = "GET" + if event_type == "GETPARAM": url = f"{self.event.data['url']}?{self.event.data['name']}={probe}" + if "additional_params" in self.event.data.keys() and self.event.data["additional_params"] is not None: url = self.lightfuzz.helpers.add_get_params( url, self.event.data["additional_params"], encode=False @@ -169,9 +199,6 @@ async def standard_probe( json_data = None if event_type == "POSTPARAM": - - - method = "POST" data = {self.event.data["name"]: probe} if self.event.data["additional_params"] is not None: @@ -224,10 +251,6 @@ def incoming_probe_value(self, populate_empty=True): probe_value = self.lightfuzz.helpers.rand_string(10, numeric_only=True) else: probe_value = "" - # if not isinstance(probe_value, str): - # raise ValueError( - # f"incoming_probe_value should always be a string (got {type(probe_value)} / {probe_value})" - # ) probe_value = str(probe_value) return probe_value diff --git a/bbot/modules/lightfuzz_submodules/cmdi.py b/bbot/modules/lightfuzz_submodules/cmdi.py index 57acfdbb5b..a422a80d14 100644 --- a/bbot/modules/lightfuzz_submodules/cmdi.py +++ b/bbot/modules/lightfuzz_submodules/cmdi.py @@ -25,9 +25,10 @@ async def fuzz(self): for p in cmdi_probe_strings: try: echo_probe = f"{probe_value}{p} echo {canary} {p}" + # we have to handle our own URL-encoding here, because our payloads include the & character if self.event.data["type"] == "GETPARAM": echo_probe = urllib.parse.quote(echo_probe.encode(), safe="") - cmdi_probe = await self.compare_probe(http_compare, self.event.data["type"], echo_probe, cookies) + cmdi_probe = await self.compare_probe(http_compare, self.event.data["type"], echo_probe, cookies, skip_urlencoding=True) if cmdi_probe[3]: if canary in cmdi_probe[3].text and "echo" not in cmdi_probe[3].text: self.lightfuzz.debug(f"canary [{canary}] found in response when sending probe [{p}]") @@ -61,9 +62,9 @@ async def fuzz(self): "probe": p, } interactsh_probe = f"{p} nslookup {subdomain_tag}.{self.lightfuzz.interactsh_domain} {p}" - + # we have to handle our own URL-encoding here, because our payloads include the & character if self.event.data["type"] == "GETPARAM": interactsh_probe = urllib.parse.quote(interactsh_probe.encode(), safe="") await self.standard_probe( - self.event.data["type"], cookies, f"{probe_value}{interactsh_probe}", timeout=15 + self.event.data["type"], cookies, f"{probe_value}{interactsh_probe}", timeout=15, skip_urlencoding=True ) diff --git a/bbot/modules/lightfuzz_submodules/path.py b/bbot/modules/lightfuzz_submodules/path.py index 827af65f2f..722cf74312 100644 --- a/bbot/modules/lightfuzz_submodules/path.py +++ b/bbot/modules/lightfuzz_submodules/path.py @@ -65,12 +65,12 @@ async def fuzz(self): confirmations = 0 while iterations > 0: try: - http_compare = self.compare_baseline(self.event.data["type"], probe_value, cookies) + http_compare = self.compare_baseline(self.event.data["type"], probe_value, cookies, skip_urlencoding=True) singledot_probe = await self.compare_probe( - http_compare, self.event.data["type"], payloads["singledot_payload"], cookies + http_compare, self.event.data["type"], payloads["singledot_payload"], cookies, skip_urlencoding=True ) doubledot_probe = await self.compare_probe( - http_compare, self.event.data["type"], payloads["doubledot_payload"], cookies + http_compare, self.event.data["type"], payloads["doubledot_payload"], cookies, skip_urlencoding=True ) if ( @@ -110,7 +110,7 @@ async def fuzz(self): } for path, trigger in absolute_paths.items(): - r = await self.standard_probe(self.event.data["type"], cookies, path) + r = await self.standard_probe(self.event.data["type"], cookies, path, skip_urlencoding=True) if r and trigger in r.text: self.results.append( { diff --git a/bbot/modules/lightfuzz_submodules/ssti.py b/bbot/modules/lightfuzz_submodules/ssti.py index 766356b583..6de2a030a8 100644 --- a/bbot/modules/lightfuzz_submodules/ssti.py +++ b/bbot/modules/lightfuzz_submodules/ssti.py @@ -7,7 +7,7 @@ async def fuzz(self): cookies = self.event.data.get("assigned_cookies", {}) ssti_probes = ["<%25%3d%201337*1337%20%25>","<%= 1337*1337 %>", "${1337*1337}", "%24%7b1337*1337%7d", "1,787{{z}},569"] for probe_value in ssti_probes: - r = await self.standard_probe(self.event.data["type"], cookies, probe_value, allow_redirects=True) + r = await self.standard_probe(self.event.data["type"], cookies, probe_value, allow_redirects=True, skip_urlencoding=True) if r and ("1787569" in r.text or "1,787,569" in r.text): self.results.append( { diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index 142bf1cdc4..ce2fbb7c91 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -494,7 +494,6 @@ def echo_cookies_handler(request): bbot_httpserver.expect_request(uri=endpoint).respond_with_handler(echo_cookies_handler) scan1 = bbot_scanner("127.0.0.1", config={"web": {"debug": True}}) r1 = await scan1.helpers.request(url, cookies={"foo": "bar"}) - print(r1.text) assert r1 is not None, "Request to self-signed SSL server went through even with ssl_verify=True" assert "bar" in r1.text diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index 76975b69f2..8bec62494f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -457,6 +457,7 @@ class TestExcavateParameterExtraction(TestExcavate):


+

Simple POST Form

@@ -502,8 +503,10 @@ def check(self, module_test, events): found_htmltags_a = False found_htmltags_img = False found_select_noquotes = False - + avoid_truncated_values = True + found_form_input_with_spaces = False for e in events: + if e.type == "WEB_PARAMETER": if e.data["description"] == "HTTP Extracted Parameter [jqueryget] (GET jquery Submodule)": found_jquery_get = True @@ -548,11 +551,19 @@ def check(self, module_test, events): if "csrf" in e.data["additional_params"].keys(): found_select_noquotes = True + if e.data["description"] == "HTTP Extracted Parameter [q4] (GET Form Submodule)": + if e.data["original_value"] == "trees and forests": + found_form_input_with_spaces = True + if e.data["original_value"] == "trees": + avoid_truncated_values = False + assert found_jquery_get, "Did not extract Jquery GET parameters" assert found_jquery_post, "Did not extract Jquery POST parameters" assert found_form_get, "Did not extract Form GET parameters" assert found_form_post, "Did not extract Form POST parameters" assert found_form_generic, "Did not extract Form (Generic) parameters" + assert found_form_input_with_spaces, "Did not extract Form input with spaces" + assert avoid_truncated_values, "Emitted a parameter with spaces without the entire value" assert found_jquery_get_original_value, "Did not extract Jquery GET parameter original_value" assert found_jquery_post_original_value, "Did not extract Jquery POST parameter original_value" assert found_form_get_original_value, "Did not extract Form GET parameter original_value" @@ -779,6 +790,30 @@ def check(self, module_test, events): assert excavate_xml_extraction, "Excavate failed to extract xml parameter" +class TestExcavateParameterExtraction_xml_invalid(TestExcavateParameterExtraction_xml): + getparam_extract_xml = """ + + 1 + invalid\nwith\nnewlines + + """ + + async def setup_after_prep(self, module_test): + respond_args = {"response_data": self.getparam_extract_xml, "headers": {"Content-Type": "application/xml"}} + module_test.set_expect_requests(respond_args=respond_args) + + def check(self, module_test, events): + excavate_xml_extraction = False + for e in events: + if e.type == "WEB_PARAMETER": + if ( + "HTTP Extracted Parameter (speculative from xml content) [newlines]" + in e.data["description"] + and "\n" not in e.data["original_value"] + ): + excavate_xml_extraction = True + assert excavate_xml_extraction, "Excavate failed to extract xml parameter" + class TestExcavateParameterExtraction_inputtagnovalue(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] diff --git a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py index 99c3b1193a..a01ac0c298 100644 --- a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +++ b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py @@ -294,9 +294,10 @@ def request_handler(self, request): value = qs.split("search=")[1] if "&" in value: value = value.split("&")[0] + xss_block = f"""
-

0 search results for '{unquote(base64.b64decode(value))}'

+

0 search results for '{unquote(base64.b64decode(unquote(value)))}'


""" @@ -337,6 +338,7 @@ def request_handler(self, request): """ if "search=" in qs: + value = qs.split("search=")[1] if "&" in value: value = value.split("&")[0] @@ -364,7 +366,6 @@ def request_handler(self, request): """ return Response(xss_block, status=200) - return Response(parameter_block, status=200) @@ -545,13 +546,15 @@ def check(self, module_test, events): # In Javascript XSS Detection class Test_Lightfuzz_xss_injs(Test_Lightfuzz_xss): - def request_handler(self, request): - qs = str(request.query_string.decode()) - parameter_block = """ + + parameter_block = """ Link """ + + def request_handler(self, request): + qs = str(request.query_string.decode()) if "language=" in qs: value = qs.split("=")[1] @@ -572,7 +575,7 @@ def request_handler(self, request): """ return Response(xss_block, status=200) - return Response(parameter_block, status=200) + return Response(self.parameter_block, status=200) async def setup_after_prep(self, module_test): module_test.scan.modules["lightfuzz"].helpers.rand_string = lambda *args, **kwargs: "AAAAAAAAAAAAAA" @@ -601,6 +604,45 @@ def check(self, module_test, events): assert xss_finding_emitted, "In Javascript XSS FINDING not emitted" +# XSS Parameter Needing URL-Encoding +class Test_Lightfuzz_urlencoding(Test_Lightfuzz_xss_injs): + config_overrides = { + "interactsh_disable": True, + "modules": { + "lightfuzz": { + "enabled_submodules": ["cmdi","crypto","path","serial","sqli","ssti","xss"], + } + }, + } + + + parameter_block = """ + + Link + + """ + + def check(self, module_test, events): + web_parameter_emitted = False + original_value_captured = False + xss_finding_emitted = False + for e in events: + if e.type == "WEB_PARAMETER": + if e.data["original_value"] is not None: + if "HTTP Extracted Parameter [language]" in e.data["description"]: + web_parameter_emitted = True + if e.data["original_value"] == "parameter with spaces": + original_value_captured = True + + if e.type == "FINDING": + if "Possible Reflected XSS. Parameter: [language] Context: [In Javascript]" in e.data["description"]: + xss_finding_emitted = True + + assert web_parameter_emitted, "WEB_PARAMETER was not emitted" + assert original_value_captured, "original_value not captured" + assert xss_finding_emitted, "In Javascript XSS FINDING not emitted" + + # SQLI Single Quote/Two Single Quote (getparam) class Test_Lightfuzz_sqli(ModuleTestBase): targets = ["http://127.0.0.1:8888"] @@ -934,10 +976,9 @@ def request_handler(self, request):

0 search results found


- """ - if "'%20AND%20(SLEEP(5))%20AND%20" in value: + """ + if "' AND (SLEEP(5)) AND '" in unquote(value): sleep(5) - return Response(sql_block, status=200) return Response(parameter_block, status=200) @@ -1108,7 +1149,7 @@ def request_handler(self, request): return response else: - if cookies["session"] == "rO0ABXQABHRlc3Q=": + if unquote(cookies["session"]) == "rO0ABXQABHRlc3Q=": return Response(java_serial_error_keyword, status=500) else: return Response(java_serial_error, status=500) @@ -1161,12 +1202,10 @@ def request_handler(self, request): """ if "search=" in qs: value = qs.split("=")[1] - if "&" in value: value = value.split("&")[0] - - if "%26%26%20echo%20" in value: - cmdi_value = value.split("%26%26%20echo%20")[1].split("%20")[0] + if "&& echo " in unquote(value): + cmdi_value = unquote(value).split("&& echo ")[1].split(" ")[0] else: cmdi_value = value cmdi_block = f""" @@ -1475,9 +1514,6 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests_handler(expect_args=re.compile(".*"), request_handler=self.request_handler) def check(self, module_test, events): - for e in events: - print(f"{e.type}: {e.data}") - web_parameter_extracted = False cryptographic_parameter_finding = False padding_oracle_detected = False From 8919bc9103d5c35ecf7f227f934c40c6a966e367 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Thu, 23 Jan 2025 12:49:38 -0500 Subject: [PATCH 2/2] fix test --- bbot/test/test_step_2/module_tests/test_module_lightfuzz.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py index a01ac0c298..63f5f71433 100644 --- a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +++ b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py @@ -628,10 +628,9 @@ def check(self, module_test, events): xss_finding_emitted = False for e in events: if e.type == "WEB_PARAMETER": - if e.data["original_value"] is not None: if "HTTP Extracted Parameter [language]" in e.data["description"]: web_parameter_emitted = True - if e.data["original_value"] == "parameter with spaces": + if e.data["original_value"] is not None and e.data["original_value"] == "parameter with spaces": original_value_captured = True if e.type == "FINDING":