From 059bfa8ea56db5cf45f9d0dc6c63688d0528a44d Mon Sep 17 00:00:00 2001 From: dcallies Date: Fri, 27 Sep 2024 17:02:52 -0400 Subject: [PATCH 1/5] [pytx] Write interfaces --- .../src/OpenMediaMatch/tests/test_api.py | 2 +- .../exchanges/signal_exchange_api.py | 33 ----- .../exchanges/write_api_mixins.py | 121 ++++++++++++++++++ 3 files changed, 122 insertions(+), 34 deletions(-) create mode 100644 python-threatexchange/threatexchange/exchanges/write_api_mixins.py diff --git a/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py b/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py index e7e96d9c7..f1c6ca4fd 100644 --- a/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py +++ b/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py @@ -214,7 +214,7 @@ def test_exchange_api_set_auth(app: Flask, client: FlaskClient): tx_name = FBThreatExchangeSignalExchangeAPI.get_name() # Monkeypatch installed types storage.exchange_types = { # type:ignore - api_cls.get_name(): api_cls # type:ignore + api_cls.get_name(): api_cls for api_cls in ( FBThreatExchangeSignalExchangeAPI, StaticSampleSignalExchangeAPI, diff --git a/python-threatexchange/threatexchange/exchanges/signal_exchange_api.py b/python-threatexchange/threatexchange/exchanges/signal_exchange_api.py index 6eefcc2e9..c043df189 100644 --- a/python-threatexchange/threatexchange/exchanges/signal_exchange_api.py +++ b/python-threatexchange/threatexchange/exchanges/signal_exchange_api.py @@ -253,39 +253,6 @@ def fetch_iter( """ raise NotImplementedError - # TODO - Restore in a future version - # def report_seen( - # self, - # collab: TCollabConfig, - # s_type: SignalType, - # signal: str, - # metadata: state.TFetchedSignalMetadata, - # ) -> None: - # """ - # Report that you observed this signal. - - # This is an optional API, and places that use it should catch - # the NotImplementError. - # """ - # raise NotImplementedError - - def report_opinion( - self, - s_type: t.Type[SignalType], - signal: str, - opinion: state.SignalOpinion, - ) -> None: - """ - Weigh in on a signal for this collaboration. - - Most implementations will want a full replacement specialization, but this - allows a common interface for all uploads for the simplest usecases. - - This is an optional API, and places that use it should catch - the NotImplementError. - """ - raise NotImplementedError - # Convenience for avoiding mypy strict errors AnySignalExchangeAPI = SignalExchangeAPI[t.Any, t.Any, t.Any, t.Any, t.Any] diff --git a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py new file mode 100644 index 000000000..06ad31a3d --- /dev/null +++ b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py @@ -0,0 +1,121 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + +""" +These contain small mixins that enable write features on SignalExchangeAPI + +While it would be possible to make all these extensions of SignalExchangeAPI, +we're just about at the limit of understandable complexity for the typing on +SignalExchangeAPI, and so moving these into mixins might allow us to simplify +the logic a bit. + +Example + class MyApiWithFeedback( + SignalExchangeAPI[ + MyCollabConfig, + MyCheckpoint, + MySignalMetadata, + str, + MyUpdateRecordValue, + ], + ExchangeWithSeen[MyCollabConfig], + ExchangeWithFeedback[MyCollabConfig], + ExchangeWithUpload[MyCollabConfig], + ): + pass + + # Inside py-tx reflection + if isinstance(exchange, ExchangeWithSeen): + exchange.submit_matched(collab, metadata) + + +@see SignalExchangeAPI +""" + +import typing as t + +from threatexchange.exchanges import fetch_state +from threatexchange.signal_type.signal_base import SignalType +from threatexchange.exchanges.collab_config import CollaborationConfigBase + + +TCollabConfig = t.TypeVar("TCollabConfig", bound=CollaborationConfigBase) + + +class ExchangeWithMatchedOnPlatform(t.Generic[TCollabConfig,]): + """ + Mixin for an exchange that supports recording + """ + + def submit_matched( + self, + # The collaboration we should share the event to + collab: TCollabConfig, + matched_record: fetch_state.TFetchedSignalMetadata, + ) -> None: + """ + Report that you matched this signal to content on your platform. + + This doesn't have any indication of whether this content is harmful, + but this can help certify that your matching implementation is + functional, and help other platforms track platform-to-platform + spread. + """ + raise NotImplementedError + + +class ExchangeWithReviewFeedback( + t.Generic[TCollabConfig, fetch_state.TFetchedSignalMetadata] +): + """ + Mixin for exchanges that supports recording the results of a manual review + """ + + def submit_review_feedback( + self, + # The collaboration we should share the event to + collab: TCollabConfig, + # asdf + reviewed_record: fetch_state.TFetchedSignalMetadata, + # Whether the review of matched content corresponded to material the + # exchange aims to find. Usually this corresponds to harmful content + review_result: t.Literal[ + fetch_state.SignalOpinionCategory.POSITIVE_CLASS, + fetch_state.SignalOpinionCategory.NEGATIVE_CLASS, + ], + # Someday, we might also support tags, since StopNCII supports it + # tags: t.Optional[t.Set[str]] = None + ) -> None: + """ + Report that you matched this signal to content on your platform. + + This doesn't have any indication of whether this content is harmful, + but this can help certify that your matching implementation is + functional, and help other platforms track platform-to-platform + spread. + """ + raise NotImplementedError + + +class ExchangeWithUpload(t.Generic[TCollabConfig]): + """ + Mixin for exchanges that supports uploading new opinions to the exchange + """ + + def submit_opinion( + self, + # The collaboration we should upload the opinion to + collab: TCollabConfig, + # The SignalType we are uploading + s_type: t.Type[SignalType], + # The signal value we are uploading + signal: str, + # The opinion we are sharing + opinion: fetch_state.SignalOpinion, + ) -> None: + """ + Weigh in on a signal for this collaboration. + + Most implementations will want a full replacement specialization, but this + allows a common interface for all uploads for the simplest usecases. + """ + raise NotImplementedError From 2bafa1e20f3d659a495bc8179ecd93b619a7e457 Mon Sep 17 00:00:00 2001 From: dcallies Date: Fri, 27 Sep 2024 21:08:57 +0000 Subject: [PATCH 2/5] fix? --- hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py b/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py index f1c6ca4fd..e7e96d9c7 100644 --- a/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py +++ b/hasher-matcher-actioner/src/OpenMediaMatch/tests/test_api.py @@ -214,7 +214,7 @@ def test_exchange_api_set_auth(app: Flask, client: FlaskClient): tx_name = FBThreatExchangeSignalExchangeAPI.get_name() # Monkeypatch installed types storage.exchange_types = { # type:ignore - api_cls.get_name(): api_cls + api_cls.get_name(): api_cls # type:ignore for api_cls in ( FBThreatExchangeSignalExchangeAPI, StaticSampleSignalExchangeAPI, From d8504d2f1a00b62bfe5414f695ad64b8e3d3ca5e Mon Sep 17 00:00:00 2001 From: dcallies Date: Sun, 29 Sep 2024 16:33:41 +0100 Subject: [PATCH 3/5] type fixes and tweaks --- .../threatexchange/cli/helpers.py | 18 +++++++++--------- .../threatexchange/cli/label_cmd.py | 15 +++++++++------ .../exchanges/write_api_mixins.py | 17 +++++++++-------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/python-threatexchange/threatexchange/cli/helpers.py b/python-threatexchange/threatexchange/cli/helpers.py index 6b0f7aeeb..380183399 100644 --- a/python-threatexchange/threatexchange/cli/helpers.py +++ b/python-threatexchange/threatexchange/cli/helpers.py @@ -38,7 +38,7 @@ class FlexFilesInputAction(argparse.Action): All producing the same result. """ - def __call__(self, _parser, namespace, values, _option_string=None): + def __call__(self, _parser, namespace, values, _option_string=None) -> None: args: t.List[str] = list(values) if not args: raise argparse.ArgumentError(self, "this argument is required") @@ -48,10 +48,10 @@ def __call__(self, _parser, namespace, values, _option_string=None): ret = [] for i, filename in enumerate(args): if filename.strip() == "-": - with tempfile.NamedTemporaryFile("wb", delete=False) as tmp: - logging.debug("Writing stdin to %s", tmp.name) - shutil.copyfileobj(sys.stdin.buffer, tmp) - filename = tmp.name + with tempfile.NamedTemporaryFile("wb", delete=False) as bytes_tmp: + logging.debug("Writing stdin to %s", bytes_tmp.name) + shutil.copyfileobj(sys.stdin.buffer, bytes_tmp) # type: ignore + filename = bytes_tmp.name elif filename.strip() == "--": # We could also just open this as a series of streams and seek() them with tempfile.NamedTemporaryFile("w", delete=False) as tmp: @@ -63,10 +63,10 @@ def __call__(self, _parser, namespace, values, _option_string=None): elif filename.startswith(("http://", "https://")): resp = requests.get(filename, allow_redirects=True) resp.raise_for_status() - with tempfile.NamedTemporaryFile("wb", delete=False) as tmp: - logging.debug("Writing -- to %s", tmp.name) - tmp.write(resp.content) - filename = pathlib.Path(tmp.name) + with tempfile.NamedTemporaryFile("wb", delete=False) as bytes_tmp: + logging.debug("Writing -- to %s", bytes_tmp.name) + bytes_tmp.write(resp.content) + filename = bytes_tmp.name path = pathlib.Path(filename) if not path.is_file(): raise argparse.ArgumentError(self, f"no such file {path}") diff --git a/python-threatexchange/threatexchange/cli/label_cmd.py b/python-threatexchange/threatexchange/cli/label_cmd.py index 7efb1301e..b8db3758d 100644 --- a/python-threatexchange/threatexchange/cli/label_cmd.py +++ b/python-threatexchange/threatexchange/cli/label_cmd.py @@ -4,16 +4,16 @@ import argparse import pathlib import typing as t -from threatexchange.cli.helpers import FlexFilesInputAction -from threatexchange.exchanges.fetch_state import SignalOpinion, SignalOpinionCategory - - -from threatexchange.signal_type.signal_base import MatchesStr, SignalType, TextHasher from threatexchange import common from threatexchange.cli.cli_config import CLISettings +from threatexchange.cli.exceptions import CommandError +from threatexchange.cli.helpers import FlexFilesInputAction +from threatexchange.exchanges.fetch_state import SignalOpinion, SignalOpinionCategory from threatexchange.content_type.content_base import ContentType +from threatexchange.exchanges import write_api_mixins from threatexchange.exchanges.collab_config import CollaborationConfigBase +from threatexchange.signal_type.signal_base import SignalType from threatexchange.cli import command_base @@ -129,12 +129,15 @@ def execute(self, settings: CLISettings) -> None: # signal_types = self.only_signals or settings.get_signal_types_for_content( # self.content_type # ) + if not isinstance(api, write_api_mixins.ExchangeWithUpload): + raise CommandError.user(f"api {api.get_name()} doesn't suppport uploading") if self.as_hash is not None: for f in self.files: signal_type = self.as_hash hash_val = signal_type.validate_signal_str(f.read_text()) - api.report_opinion( + api.submit_opinion( + self.collab, signal_type, hash_val, SignalOpinion( diff --git a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py index 06ad31a3d..6e2d59e86 100644 --- a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py +++ b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py @@ -17,8 +17,8 @@ class MyApiWithFeedback( str, MyUpdateRecordValue, ], - ExchangeWithSeen[MyCollabConfig], - ExchangeWithFeedback[MyCollabConfig], + ExchangeWithSeen[MyCollabConfig, MyUpdateRecordValue], + ExchangeWithFeedback[MyCollabConfig, MyUpdateRecordValue], ExchangeWithUpload[MyCollabConfig], ): pass @@ -39,9 +39,12 @@ class MyApiWithFeedback( TCollabConfig = t.TypeVar("TCollabConfig", bound=CollaborationConfigBase) +TFetchedSignalMetadata = t.TypeVar( + "TFetchedSignalMetadata", bound=fetch_state.FetchedSignalMetadata +) -class ExchangeWithMatchedOnPlatform(t.Generic[TCollabConfig,]): +class ExchangeWithMatchedOnPlatform(t.Generic[TCollabConfig, TFetchedSignalMetadata]): """ Mixin for an exchange that supports recording """ @@ -50,7 +53,7 @@ def submit_matched( self, # The collaboration we should share the event to collab: TCollabConfig, - matched_record: fetch_state.TFetchedSignalMetadata, + matched_record: TFetchedSignalMetadata, ) -> None: """ Report that you matched this signal to content on your platform. @@ -63,9 +66,7 @@ def submit_matched( raise NotImplementedError -class ExchangeWithReviewFeedback( - t.Generic[TCollabConfig, fetch_state.TFetchedSignalMetadata] -): +class ExchangeWithReviewFeedback(t.Generic[TCollabConfig, TFetchedSignalMetadata]): """ Mixin for exchanges that supports recording the results of a manual review """ @@ -75,7 +76,7 @@ def submit_review_feedback( # The collaboration we should share the event to collab: TCollabConfig, # asdf - reviewed_record: fetch_state.TFetchedSignalMetadata, + reviewed_record: TFetchedSignalMetadata, # Whether the review of matched content corresponded to material the # exchange aims to find. Usually this corresponds to harmful content review_result: t.Literal[ From 36276fb5ec9339482d87e77ef58b270d7a7135e8 Mon Sep 17 00:00:00 2001 From: dcallies Date: Sun, 29 Sep 2024 16:43:12 +0100 Subject: [PATCH 4/5] comments --- .../threatexchange/exchanges/write_api_mixins.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py index 6e2d59e86..04a2d8de4 100644 --- a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py +++ b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py @@ -53,6 +53,7 @@ def submit_matched( self, # The collaboration we should share the event to collab: TCollabConfig, + # The API record we matched matched_record: TFetchedSignalMetadata, ) -> None: """ @@ -75,7 +76,7 @@ def submit_review_feedback( self, # The collaboration we should share the event to collab: TCollabConfig, - # asdf + # The API record that we're sharing feedback on reviewed_record: TFetchedSignalMetadata, # Whether the review of matched content corresponded to material the # exchange aims to find. Usually this corresponds to harmful content From 98a843cc3fd24e4109232b1b5aa1ab79586a5607 Mon Sep 17 00:00:00 2001 From: dcallies Date: Wed, 2 Oct 2024 11:06:05 +0100 Subject: [PATCH 5/5] more twiddling --- .../exchanges/write_api_mixins.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py index 04a2d8de4..0362fbc23 100644 --- a/python-threatexchange/threatexchange/exchanges/write_api_mixins.py +++ b/python-threatexchange/threatexchange/exchanges/write_api_mixins.py @@ -44,17 +44,24 @@ class MyApiWithFeedback( ) -class ExchangeWithMatchedOnPlatform(t.Generic[TCollabConfig, TFetchedSignalMetadata]): +class ExchangeWithMatchedOnPlatform( + t.Generic[TCollabConfig, fetch_state.TUpdateRecordKey] +): """ - Mixin for an exchange that supports recording + Mixin for an exchange that supports recording you have successfully made a match + + This can be useful for automatically cerifying that your matching pipeline is + functioning, or to allow insights on platform spread. """ def submit_matched( self, # The collaboration we should share the event to collab: TCollabConfig, - # The API record we matched - matched_record: TFetchedSignalMetadata, + # The API record that we matched, corresponding to the key + # from the original API. This seems to be unifying across all existing APIs + # though some can reconstruct the feedback target from the signal or the metadata + matched_record_key: fetch_state.TUpdateRecordKey, ) -> None: """ Report that you matched this signal to content on your platform. @@ -67,17 +74,24 @@ def submit_matched( raise NotImplementedError -class ExchangeWithReviewFeedback(t.Generic[TCollabConfig, TFetchedSignalMetadata]): +class ExchangeWithReviewFeedback( + t.Generic[TCollabConfig, fetch_state.TUpdateRecordKey] +): """ - Mixin for exchanges that supports recording the results of a manual review + Mixin for exchanges that supports recording the results of a manual review. + + This can help other platforms stack rank potential matches for review to + prioritize the ones that seem to work well accross multiple platforms. """ def submit_review_feedback( self, # The collaboration we should share the event to collab: TCollabConfig, - # The API record that we're sharing feedback on - reviewed_record: TFetchedSignalMetadata, + # The API record that we're sharing feedback on, corresponding to the key + # from the original API. This seems to be unifying across all existing APIs + # though some can reconstruct the feedback target from the signal or the metadata + matched_record_key: fetch_state.TUpdateRecordKey, # Whether the review of matched content corresponded to material the # exchange aims to find. Usually this corresponds to harmful content review_result: t.Literal[