diff --git a/datadog/util/format.py b/datadog/util/format.py index b325d9690..a29edb231 100644 --- a/datadog/util/format.py +++ b/datadog/util/format.py @@ -30,4 +30,4 @@ def force_to_epoch_seconds(epoch_sec_or_dt): def normalize_tags(tag_list): - return [re.sub(TAG_INVALID_CHARS_RE, TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list] + return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list] diff --git a/tests/performance/test_statsd_throughput.py b/tests/performance/test_statsd_throughput.py index ef548c47d..59c45411c 100644 --- a/tests/performance/test_statsd_throughput.py +++ b/tests/performance/test_statsd_throughput.py @@ -6,8 +6,11 @@ # Copyright 2015-Present Datadog, Inc # stdlib +import cProfile +import io import logging import os +import pstats import random import sys import threading @@ -22,6 +25,7 @@ # datadog from datadog.dogstatsd.base import DogStatsd +from datadog.util.compat import is_p3k # test utils from tests.util.fake_statsd_server import FakeServer @@ -121,6 +125,7 @@ def setUp(self): os.getenv("BENCHMARK_NUM_THREADS", str(self.DEFAULT_NUM_THREADS)) ) self.num_runs = int(os.getenv("BENCHMARK_NUM_RUNS", str(self.DEFAULT_NUM_RUNS))) + self.profiling_enabled = os.getenv("BENCHMARK_PROFILING", "false") in ["1", "true", "True", "Y", "yes", "Yes"] self.transport = os.getenv( "BENCHMARK_TRANSPORT", str(self.DEFAULT_TRANSPORT) ).upper() @@ -142,11 +147,12 @@ def one_line_warning(message, category, filename, lineno, *_): # pylint: disable=too-many-locals def test_statsd_performance(self): print( - "Starting: {} run(s), {} threads, {} points/thread via {}...".format( + "Starting: {} run(s), {} threads, {} points/thread via {} (profiling: {})...".format( self.num_runs, self.num_threads, self.num_datapoints, self.transport, + str(self.profiling_enabled).lower(), ) ) @@ -250,6 +256,7 @@ def _execute_test_run(self, server, metrics_order, num_threads, num_datapoints): start_signal, metrics_order[thread_idx], latency_results, + self.profiling_enabled, ), ) thread.daemon = True @@ -311,11 +318,19 @@ def _execute_test_run(self, server, metrics_order, num_threads, num_datapoints): @staticmethod def _thread_runner( - statsd_instance, start_event, thread_metrics_order, latency_results + statsd_instance, + start_event, + thread_metrics_order, + latency_results, + profiling_enabled, ): # We wait for a global signal to start running our events start_event.wait(5) + if profiling_enabled: + profiler = cProfile.Profile() + profiler.enable() + duration = 0.0 for metric_idx, metric in enumerate(thread_metrics_order): start_time = timeit.default_timer() @@ -328,3 +343,31 @@ def _thread_runner( statsd_instance.flush() latency_results.put(duration) + + if profiling_enabled: + TestDogStatsdThroughput.print_profiling_stats(profiler) + + + @staticmethod + def print_profiling_stats(profiler, sort_by='cumulative'): + """ + Prints profiling results for the thread that finishes its run. Options for + sorting include 'tottime', 'pcalls', 'ncalls', 'cumulative', etc but you can + check https://github.com/python/cpython/blob/3.9/Lib/pstats.py#L37-L45 for + other options. + """ + + profiler.disable() + + if is_p3k(): + output_stream = io.StringIO() + else: + output_stream = io.BytesIO() + + profiling_stats = pstats.Stats( + profiler, + stream=output_stream, + ).sort_stats(sort_by) + + profiling_stats.print_stats() + print(output_stream.getvalue()) diff --git a/tests/unit/util/test_format.py b/tests/unit/util/test_format.py index 554718a6d..1d3d7cb36 100644 --- a/tests/unit/util/test_format.py +++ b/tests/unit/util/test_format.py @@ -1,9 +1,12 @@ +# coding: utf8 # Unless explicitly stated otherwise all files in this repository are licensed under the BSD-3-Clause License. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2015-Present Datadog, Inc +import unittest + import pytest -from datadog.util.format import construct_url +from datadog.util.format import construct_url, normalize_tags class TestConstructURL: @@ -30,3 +33,20 @@ class TestConstructURL: @pytest.mark.parametrize("host,api_version,path,expected", test_data) def test_construct_url(self, host, api_version, path, expected): assert construct_url(host, api_version, path) == expected + +class TestNormalizeTags: + """ + Test of the format's `normalize_tags` functionality + """ + test_data = [ + (['this is a tag'], ['this_is_a_tag']), + (['abc!@#$%^&*()0987654321{}}{'], ['abc__________0987654321____']), + (['abc!@#', '^%$#3456#'], ['abc___', '____3456_']), + (['mutliple', 'tags', 'included'], ['mutliple', 'tags', 'included']), + ([u'абвгдежзийкл', u'абв' , 'test123'], [u'абвгдежзийкл', u'абв' , 'test123']), + ([u'абвгд西😃ежзийкл', u'аб😃西в' , u'a😃😃b'], [u'абвгд西_ежзийкл', u'аб_西в', u'a__b']), + ] + + @pytest.mark.parametrize("original_tags,expected_tags", test_data) + def test_normalize_tags(self, original_tags, expected_tags): + assert normalize_tags(original_tags) == expected_tags