From ab5dd40114468899cf946b73729735c4903d26e1 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 22 Jan 2025 12:18:54 -0600 Subject: [PATCH] Add flag backend to tagstore --- .../api/endpoints/project_tagkey_values.py | 10 +- src/sentry/api/endpoints/project_tags.py | 12 ++- src/sentry/conf/server.py | 4 + src/sentry/flags/docs/api.md | 100 ------------------ src/sentry/tagstore/__init__.py | 6 ++ src/sentry/tagstore/snuba/__init__.py | 2 +- src/sentry/tagstore/snuba/backend.py | 69 +++++++++--- .../endpoints/test_project_tagkey_values.py | 36 +++++++ .../snuba/api/endpoints/test_project_tags.py | 41 +++++++ 9 files changed, 160 insertions(+), 120 deletions(-) diff --git a/src/sentry/api/endpoints/project_tagkey_values.py b/src/sentry/api/endpoints/project_tagkey_values.py index 83bed72538978c..7d6218a5831ab8 100644 --- a/src/sentry/api/endpoints/project_tagkey_values.py +++ b/src/sentry/api/endpoints/project_tagkey_values.py @@ -42,8 +42,14 @@ def get(self, request: Request, project, key) -> Response: # if the environment doesn't exist then the tag can't possibly exist raise ResourceDoesNotExist + # Flags also autocomplete. We can switch the dataset we target. + if request.GET.get("useFlagsBackend") == "1": + backend = tagstore.flag_backend + else: + backend = tagstore.backend + try: - tagkey = tagstore.backend.get_tag_key( + tagkey = backend.get_tag_key( project.id, environment_id, lookup_key, @@ -54,7 +60,7 @@ def get(self, request: Request, project, key) -> Response: start, end = get_date_range_from_params(request.GET) - paginator = tagstore.backend.get_tag_value_paginator( + paginator = backend.get_tag_value_paginator( project.id, environment_id, tagkey.key, diff --git a/src/sentry/api/endpoints/project_tags.py b/src/sentry/api/endpoints/project_tags.py index 3389c62074051d..691e3fe68f2642 100644 --- a/src/sentry/api/endpoints/project_tags.py +++ b/src/sentry/api/endpoints/project_tags.py @@ -27,8 +27,16 @@ def get(self, request: Request, project) -> Response: if request.GET.get("onlySamplingTags") == "1": kwargs["denylist"] = DS_DENYLIST + # Flags also autocomplete. We switch our backend and also enforce the + # events dataset. Flags are limited to errors. + use_flag_backend = request.GET.get("useFlagsBackend") == "1" + if use_flag_backend: + backend = tagstore.flag_backend + else: + backend = tagstore.backend + tag_keys = sorted( - tagstore.backend.get_tag_keys( + backend.get_tag_keys( project.id, environment_id, tenant_ids={"organization_id": project.organization_id}, @@ -48,7 +56,7 @@ def get(self, request: Request, project) -> Response: "key": tagstore.backend.get_standardized_key(tag_key.key), "name": tagstore.backend.get_tag_key_label(tag_key.key), "uniqueValues": tag_key.values_seen, - "canDelete": tag_key.key not in PROTECTED_TAG_KEYS, + "canDelete": tag_key.key not in PROTECTED_TAG_KEYS and not use_flag_backend, } ) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 5afdf18efe14ce..e0b651003f903a 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -1764,6 +1764,10 @@ def custom_parameter_sort(parameter: dict) -> tuple[str, int]: SENTRY_TAGSTORE = os.environ.get("SENTRY_TAGSTORE", "sentry.tagstore.snuba.SnubaTagStorage") SENTRY_TAGSTORE_OPTIONS: dict[str, Any] = {} +# Flag storage backend +SENTRY_FLAGSTORE = os.environ.get("SENTRY_FLAGSTORE", "sentry.tagstore.snuba.SnubaFlagStorage") +SENTRY_FLAGSTORE_OPTIONS: dict[str, Any] = {} + # Search backend SENTRY_SEARCH = os.environ.get( "SENTRY_SEARCH", "sentry.search.snuba.EventsDatasetSnubaSearchBackend" diff --git a/src/sentry/flags/docs/api.md b/src/sentry/flags/docs/api.md index d166b1ce3b9991..e16a5731ff1b23 100644 --- a/src/sentry/flags/docs/api.md +++ b/src/sentry/flags/docs/api.md @@ -205,103 +205,3 @@ Any request content-type is acceptable (JSON, XML, binary-formats) so long as th - Request - Response 201 - -## Flag Keys [/organizations//flags/keys/] - -- Parameters - - end (optional, string) - ISO 8601 format. Required if `start` is set. - - project (number) - The project to search. - - sort (string) - A field to sort by. Optionally prepended with a hyphen to indicate descending order. - - start (optional, string) - ISO 8601 format (`YYYY-MM-DDTHH:mm:ss.sssZ`) - - statsPeriod (optional, string) - A positive integer suffixed with a unit type. - - useCache (number) - Boolean number which determines if we should use the cache or not. 0 for false; 1 for true. - - cursor (optional, string) - - per_page (optional, number) - Default: 10 - - offset (optional, number) - Default: 0 - -**Attributes** - -| Column | Type | Description | -| ----------- | ------ | ----------- | -| key | string | | -| name | string | | -| totalValues | number | | - -### Browse Flag Keys [GET] - -Retrieve a collection of flag keys. - -- Response 200 - - ```json - [ - { - "key": "sdk_name", - "name": "Sdk Name", - "totalValues": 2444 - } - ] - ``` - -## Flag Key [/organizations//flags/keys//] - -### Fetch Flag Key [GET] - -Fetch a flag key. - -- Response 200 - - ```json - { - "key": "sdk_name", - "name": "Sdk Name", - "totalValues": 2444 - } - ``` - -## Flag Values [/organizations//flags/keys//values/] - -- Parameters - - end (optional, string) - ISO 8601 format. Required if `start` is set. - - project (number) - The project to search. - - sort (string) - A field to sort by. Optionally prepended with a hyphen to indicate descending order. - - start (optional, string) - ISO 8601 format (`YYYY-MM-DDTHH:mm:ss.sssZ`) - - statsPeriod (optional, string) - A positive integer suffixed with a unit type. - - useCache (number) - Boolean number which determines if we should use the cache or not. 0 for false; 1 for true. - - cursor (optional, string) - - per_page (optional, number) - Default: 10 - - offset (optional, number) - Default: 0 - -**Attributes** - -| Column | Type | Description | -| --------- | ------ | ----------- | -| key | string | | -| name | string | | -| value | string | | -| count | number | | -| lastSeen | string | | -| firstSeen | string | | - -### Browse Flag Values [GET] - -Retrieve a collection of flag values. - -- Response 200 - - ```json - [ - { - "key": "isCustomerDomain", - "name": "yes", - "value": "yes", - "count": 15525, - "lastSeen": "2025-01-22T15:59:13Z", - "firstSeen": "2025-01-21T15:59:02Z" - } - ] - ``` diff --git a/src/sentry/tagstore/__init__.py b/src/sentry/tagstore/__init__.py index c202b6c6a3e21a..241ade3a11560b 100644 --- a/src/sentry/tagstore/__init__.py +++ b/src/sentry/tagstore/__init__.py @@ -7,3 +7,9 @@ backend = LazyServiceWrapper(TagStorage, settings.SENTRY_TAGSTORE, settings.SENTRY_TAGSTORE_OPTIONS) backend.expose(locals()) + +# Searches the "flags" columns instead of "tags". +flag_backend = LazyServiceWrapper( + TagStorage, settings.SENTRY_FLAGSTORE, settings.SENTRY_FLAGSTORE_OPTIONS +) +flag_backend.expose(locals()) diff --git a/src/sentry/tagstore/snuba/__init__.py b/src/sentry/tagstore/snuba/__init__.py index 8e5b8504df9dd4..375b9ceeae1e6e 100644 --- a/src/sentry/tagstore/snuba/__init__.py +++ b/src/sentry/tagstore/snuba/__init__.py @@ -1 +1 @@ -from .backend import SnubaTagStorage # NOQA +from .backend import SnubaFlagStorage, SnubaTagStorage # NOQA diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index 2b4df05b768462..37c00409ed6787 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -116,7 +116,7 @@ class _OptimizeKwargs(TypedDict, total=False): sample: int -class SnubaTagStorage(TagStorage): +class _SnubaTagStorage(TagStorage): def __get_tag_key_and_top_values( self, project_id, @@ -128,7 +128,7 @@ def __get_tag_key_and_top_values( tenant_ids=None, **kwargs, ): - tag = f"tags[{key}]" + tag = self.format_string.format(key) filters = {"project_id": get_project_list(project_id)} if environment_id: filters["environment"] = [environment_id] @@ -260,10 +260,10 @@ def __get_tag_keys_for_projects( dataset, filters = self.apply_group_filters(group, filters) if keys is not None: - filters["tags_key"] = sorted(keys) + filters[self.key_column] = sorted(keys) if include_values_seen: - aggregations.append(["uniq", "tags_value", "values_seen"]) + aggregations.append(["uniq", self.value_column, "values_seen"]) should_cache = use_cache and group is None result = None @@ -303,7 +303,7 @@ def __get_tag_keys_for_projects( dataset=dataset, start=start, end=end, - groupby=["tags_key"], + groupby=[self.key_column], conditions=[], filter_keys=filters, aggregations=aggregations, @@ -480,7 +480,9 @@ def __get_group_list_tag_value( filters = {"project_id": project_ids, "group_id": group_id_list} if environment_ids: filters["environment"] = environment_ids - conditions = (extra_conditions if extra_conditions else []) + [[f"tags[{key}]", "=", value]] + conditions = (extra_conditions if extra_conditions else []) + [ + [self.format_string.format(key), "=", value] + ] aggregations = (extra_aggregations if extra_aggregations else []) + [ ["count()", "", "times_seen"], ["min", SEEN_COLUMN, "first_seen"], @@ -537,7 +539,7 @@ def get_generic_group_list_tag_value( Condition(Column("group_id"), Op.IN, group_id_list), Condition(Column("timestamp"), Op.LT, end), Condition(Column("timestamp"), Op.GTE, start), - Condition(Column(f"tags[{key}]"), Op.EQ, value), + Condition(Column(self.format_string.format(key)), Op.EQ, value), ] if translated_params.get("environment"): Condition(Column("environment"), Op.IN, translated_params["environment"]), @@ -582,7 +584,7 @@ def apply_group_filters(self, group: Group | None, filters): return dataset, filters def get_group_tag_value_count(self, group, environment_id, key, tenant_ids=None): - tag = f"tags[{key}]" + tag = self.format_string.format(key) filters = {"project_id": get_project_list(group.project_id)} if environment_id: filters["environment"] = [environment_id] @@ -633,7 +635,7 @@ def get_group_tag_keys_and_top_values( if environment_ids: filters["environment"] = environment_ids if keys is not None: - filters["tags_key"] = keys + filters[self.key_column] = keys dataset, filters = self.apply_group_filters(group, filters) aggregations = kwargs.get("aggregations", []) aggregations += [ @@ -646,12 +648,12 @@ def get_group_tag_keys_and_top_values( dataset=dataset, start=kwargs.get("start"), end=kwargs.get("end"), - groupby=["tags_key", "tags_value"], + groupby=[self.key_column, self.value_column], conditions=conditions, filter_keys=filters, aggregations=aggregations, orderby="-count", - limitby=[value_limit, "tags_key"], + limitby=[value_limit, self.key_column], referrer="tagstore._get_tag_keys_and_top_values", tenant_ids=tenant_ids, ) @@ -1047,6 +1049,9 @@ def _get_tag_values_for_releases_across_all_datasets(self, projects, environment [(i, TagValue(RELEASE_ALIAS, v, None, None, None)) for i, v in enumerate(versions)] ) + def get_snuba_column_name(self, key: str, dataset: Dataset): + return snuba.get_snuba_column_name(key, dataset=dataset) + def get_tag_value_paginator_for_projects( self, projects, @@ -1077,7 +1082,7 @@ def get_tag_value_paginator_for_projects( if include_replays: dataset = Dataset.Replays - snuba_key = snuba.get_snuba_column_name(key, dataset=dataset) + snuba_key = self.get_snuba_column_name(key, dataset=dataset) # We cannot search the values of these columns like we do other columns because they are # a different type, and as such, LIKE and != do not work on them. Furthermore, because the @@ -1192,7 +1197,7 @@ def get_tag_value_paginator_for_projects( snuba_name = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_field() snuba.resolve_complex_column(snuba_name, resolver, []) elif snuba_name in BLACKLISTED_COLUMNS: - snuba_name = f"tags[{key}]" + snuba_name = self.format_string.format(key) if query: query = query.replace("\\", "\\\\") @@ -1299,7 +1304,7 @@ def get_group_tag_value_iter( ) -> list[GroupTagValue]: filters = { "project_id": get_project_list(group.project_id), - "tags_key": [key], + self.key_column: [key], } dataset, filters = self.apply_group_filters(group, filters) @@ -1307,7 +1312,7 @@ def get_group_tag_value_iter( filters["environment"] = environment_ids results = snuba.query( dataset=dataset, - groupby=["tags_value"], + groupby=[self.value_column], filter_keys=filters, conditions=[], aggregations=[ @@ -1358,3 +1363,37 @@ def get_group_tag_value_paginator( [(int(getattr(gtv, score_field).timestamp() * 1000), gtv) for gtv in group_tag_values], reverse=desc, ) + + +class SnubaTagStorage(_SnubaTagStorage): + key_column = "tags_key" + value_column = "tags_value" + format_string = "tags[{}]" + + +# Quick and dirty overload to support flag aggregations. This probably deserves +# a better refactor for now we're just raising within the functions we don't want +# to support. This sort of refactor couples flags behavior to a lot of tags +# specific behavior. The interfaces are compatible and flags are basically tags +# just with a different column to live on. + + +class SnubaFlagStorage(_SnubaTagStorage): + key_column = "flags_key" + value_column = "flags_value" + format_string = "flags[{}]" + + def get_release_tags(self, *args, **kwargs): + raise NotImplementedError + + def __get_groups_user_counts(self, *args, **kwargs): + raise NotImplementedError + + def get_groups_user_counts(self, *args, **kwargs): + raise NotImplementedError + + def get_generic_groups_user_counts(self, *args, **kwargs): + raise NotImplementedError + + def get_snuba_column_name(self, key: str, dataset: Dataset): + return f"flags[{key}]" diff --git a/tests/sentry/api/endpoints/test_project_tagkey_values.py b/tests/sentry/api/endpoints/test_project_tagkey_values.py index 72d881b48ca7d8..7881b1ca0f0a0f 100644 --- a/tests/sentry/api/endpoints/test_project_tagkey_values.py +++ b/tests/sentry/api/endpoints/test_project_tagkey_values.py @@ -137,3 +137,39 @@ def test_start_end_query(self): assert response.status_code == 200 assert len(response.data) == 1 assert response.data[0]["value"] == "bar" + + def test_simple_flags(self): + project = self.create_project() + self.store_event( + data={ + "contexts": {"flags": {"values": [{"flag": "abc", "result": True}]}}, + "timestamp": before_now(seconds=1).isoformat(), + }, + project_id=project.id, + ) + self.store_event( + data={ + "contexts": {"flags": {"values": [{"flag": "abc", "result": False}]}}, + "timestamp": before_now(seconds=1).isoformat(), + }, + project_id=project.id, + ) + + self.login_as(user=self.user) + + url = reverse( + "sentry-api-0-project-tagkey-values", + kwargs={ + "organization_id_or_slug": project.organization.slug, + "project_id_or_slug": project.slug, + "key": "abc", + }, + ) + + response = self.client.get(url + "?useFlagsBackend=1") + assert response.status_code == 200 + assert len(response.data) == 2 + + results = sorted(response.data, key=lambda i: i["value"]) + assert results[0]["value"] == "false" + assert results[1]["value"] == "true" diff --git a/tests/snuba/api/endpoints/test_project_tags.py b/tests/snuba/api/endpoints/test_project_tags.py index 589c103522e167..87ddff9c57645b 100644 --- a/tests/snuba/api/endpoints/test_project_tags.py +++ b/tests/snuba/api/endpoints/test_project_tags.py @@ -72,3 +72,44 @@ def test_simple_remove_tags_in_denylist_remove_all_tags(self): data = {v["key"]: v for v in response.data} assert len(data) == 0 assert data == {} + + def test_simple_flags(self): + self.store_event( + data={ + "contexts": { + "flags": { + "values": [ + {"flag": "abc", "result": True}, + {"flag": "def", "result": False}, + ] + } + }, + "timestamp": before_now(minutes=1).isoformat(), + }, + project_id=self.project.id, + ) + self.store_event( + data={ + "contexts": { + "flags": { + "values": [ + {"flag": "abc", "result": False}, + ] + } + }, + "timestamp": before_now(minutes=1).isoformat(), + }, + project_id=self.project.id, + ) + + response = self.get_success_response( + self.project.organization.slug, self.project.slug, useFlagsBackend="1" + ) + + data = {v["key"]: v for v in response.data} + assert len(data) == 2 + + assert data["def"]["canDelete"] is False + assert data["def"]["uniqueValues"] == 1 + assert data["abc"]["canDelete"] is False + assert data["abc"]["uniqueValues"] == 2