diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py
index 988850ce7..0048f5778 100644
--- a/bbot/core/event/base.py
+++ b/bbot/core/event/base.py
@@ -1398,6 +1398,7 @@ def _outgoing_dedup_hash(self, event):
def _url(self):
return self.data["url"]
+
def __str__(self):
max_event_len = 200
diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py
index 688f9f599..7c9549269 100644
--- a/bbot/core/helpers/misc.py
+++ b/bbot/core/helpers/misc.py
@@ -891,7 +891,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
xml_data (str): XML-formatted string containing elements.
Returns:
- set: A set of tuples containing the tags and their corresponding text values present in the XML object.
+ set: A set of tuples containing the tags and their corresponding sanitized text values present in the XML object.
Raises:
Returns an empty set if ParseError occurs.
@@ -913,7 +913,10 @@ def extract_params_xml(xml_data, compare_mode="getparam"):
while stack:
current_element = stack.pop()
if validate_parameter(current_element.tag, compare_mode):
- tag_value_pairs.add((current_element.tag, current_element.text))
+ # Sanitize the text value
+ text_value = current_element.text.strip() if current_element.text else None
+ sanitized_value = quote(text_value, safe='') if text_value else None
+ tag_value_pairs.add((current_element.tag, sanitized_value))
for child in current_element:
stack.append(child)
return tag_value_pairs
diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py
index e007bff03..809b2d6e3 100644
--- a/bbot/core/helpers/regexes.py
+++ b/bbot/core/helpers/regexes.py
@@ -114,10 +114,10 @@
# For use with excavate parameters extractor
input_tag_regex = re.compile(
- r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w]*)[\"\']?[^>]*?>"
+ r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w\s]*)[\"\']?[^>]*?>"
)
input_tag_regex2 = re.compile(
- r"]*?\svalue=[\"\']?([:\-%\._=+\/\w]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
+ r"]*?\svalue=[\"\']?([:\-%\._=+\/\w\s]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
)
input_tag_novalue_regex = re.compile(r"]*\b\svalue=)[^>]*?\sname=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>")
# jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=")
@@ -169,7 +169,7 @@
button_tag_regex2 = re.compile(
r"