From 7fd6d6b4df47c6b2fd79093974e81b03859dcf4e Mon Sep 17 00:00:00 2001 From: ReversingLabs <55623149+rl-devops@users.noreply.github.com> Date: Fri, 27 Jan 2023 12:24:40 +0100 Subject: [PATCH] add a simple pattern analizer for future use --- DONE | 2 ++ analize_patterns.py | 58 +++++++++++++++++++++++++++++++++++++++++++ compare_known_tld.py | 59 ++++++++++++++++++++++---------------------- 3 files changed, 90 insertions(+), 29 deletions(-) create mode 100755 analize_patterns.py diff --git a/DONE b/DONE index 18ffddc..cee52e6 100644 --- a/DONE +++ b/DONE @@ -43,3 +43,5 @@ DONE - convert the list of tld to Dict - allow override or change and adding new domains without needing a new version directly - tested with existing testdomains, all reponses will now respond with the true tld not the one with a underscore + + - add simple autodetect based on tld from IANA, try to use the .com patterns to se if we get someting usefull diff --git a/analize_patterns.py b/analize_patterns.py new file mode 100755 index 0000000..111d686 --- /dev/null +++ b/analize_patterns.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python3 + +import sys +import re +from typing import ( + # Optional, + # List, + Dict, +) + +# most likely we can now introduce trailing whitespace trim on all lines from whois, +# and simplefy trailing whitespace rules +# as \r is already gone now and that was the most disticnt line ending +# occasionally we need to detect \n\s+ for groups that belong together +# mostly with indented blocks of nameservers + +# import whois +from whois.tld_regexpr import ZZ + + +def buildRegCollection(zz: Dict): + regCollection = {} + # get all regexes + for name in zz: + # print(name) + z = zz[name] + for key in z: + if key is None: + continue + + if key.startswith("_"): + continue + + if key in ["extend"]: + continue + + if key not in regCollection: + regCollection[key] = {} + + reg = z[key] + if reg is None: + continue + + regCollection[key][reg] = None + if isinstance(reg, str): + regCollection[key][reg] = re.compile(reg, flags=re.IGNORECASE) + + return regCollection + + +if __name__ == "__main__": + regCollection = buildRegCollection(ZZ) + + for name in sorted(regCollection.keys()): + print(f"## {name}", file=sys.stderr) + for key in sorted(regCollection[name].keys()): + if key: + print(f"{name}: {key}") diff --git a/compare_known_tld.py b/compare_known_tld.py index ca6a88c..e867bd4 100755 --- a/compare_known_tld.py +++ b/compare_known_tld.py @@ -67,34 +67,35 @@ found = {} for tld in dataList2: data, status = i.getInfoOnOneTld(tld) - # print(status, data) - - if data and "whois" in data and data["whois"] and data["whois"] != "NULL": - wh = data["whois"] - if wh.endswith(f".{tld}"): - dd = wh.split(".")[-2:] - else: - dd = ["meta", tld] - - zz = _do_whois_query( - dd, - ignore_returncode=False, - server=wh, - ) - - pp = {"_server": wh, "extend": "com"} - aDictToTestOverride = {tld: pp} - - whois.mergeExternalDictWithRegex(aDictToTestOverride) - try: - d = whois.query(".".join(dd)) - if d: - print(d.__dict__) - if len(d.name_servers) > 0: - found[tld] = pp - print(f"## ZZ['{tld}'] = {found[tld]} # auto-detected via IANA tld") - except Exception as e: - print(e) + xtest = data and ("whois" in data) and (data["whois"]) and (data["whois"] != "NULL") + if not xtest: + print(f"no whois info for tld: {tld} {data}") + continue + + wh = data["whois"] + if wh.endswith(f".{tld}"): + dd = wh.split(".")[-2:] else: - print(f"no whois info for tld: {tld}\n", data) + dd = ["meta", tld] + + print(f"try: {tld}") + zz = _do_whois_query( + dd, + ignore_returncode=False, + server=wh, + ) + + pp = {"_server": wh, "extend": "com"} + aDictToTestOverride = {tld: pp} + + whois.mergeExternalDictWithRegex(aDictToTestOverride) + try: + d = whois.query(".".join(dd)) + if d: + print(d.__dict__) + if len(d.name_servers) > 0: + found[tld] = pp + print(f"## ZZ['{tld}'] = {found[tld]} # auto-detected via IANA tld") + except Exception as e: + print(e)