Strip leading slashes for sampling config (#809)

* strip leading slashes for sampling config * stripe whitespace * stripe leading slash in web requests and celery tests
scoutapp · Jan 15, 2025 · b5b7f3a · b5b7f3a
1 parent 226d564
commit b5b7f3a
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 14 deletions.
diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py
@@ -287,6 +287,10 @@ def value(self, key: str) -> None:
         return None
 
 
+def _strip_leading_slash(path: str) -> str:
+    return path.lstrip(" /").strip()
+
+
 def convert_to_bool(value: Any) -> bool:
     if isinstance(value, bool):
         return value
@@ -339,14 +343,22 @@ def convert_to_list(value: Any) -> List[Any]:
     return []
 
 
+def convert_ignore_paths(value: Any) -> List[str]:
+    """
+    Removes leading slashes from paths and returns a list of strings.
+    """
+    raw_paths = convert_to_list(value)
+    return [_strip_leading_slash(path) for path in raw_paths]
+
+
 def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, int]:
     """
     Converts endpoint sampling configuration from string or dict format
     to a normalized dict.
     Example: '/endpoint:40,/test:0' -> {'/endpoint': 40, '/test': 0}
     """
     if isinstance(value, dict):
-        return {k: int(v) for k, v in value.items()}
+        return {_strip_leading_slash(k): int(v) for k, v in value.items()}
     if isinstance(value, str):
         if not value.strip():
             return {}
@@ -362,7 +374,7 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in
                         "Must be between 0 and 100."
                     )
                     continue
-                result[endpoint.strip()] = rate_int
+                result[_strip_leading_slash(endpoint)] = rate_int
             except ValueError:
                 logger.warning(f"Invalid sampling configuration: {pair}")
                 continue
@@ -375,9 +387,9 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in
     "core_agent_download": convert_to_bool,
     "core_agent_launch": convert_to_bool,
     "disabled_instruments": convert_to_list,
-    "ignore": convert_to_list,
-    "ignore_endpoints": convert_to_list,
-    "ignore_jobs": convert_to_list,
+    "ignore": convert_ignore_paths,
+    "ignore_endpoints": convert_ignore_paths,
+    "ignore_jobs": convert_ignore_paths,
     "monitor": convert_to_bool,
     "sample_rate": convert_sample_rate,
     "sample_endpoints": convert_endpoint_sampling,

diff --git a/src/scout_apm/core/web_requests.py b/src/scout_apm/core/web_requests.py
@@ -96,7 +96,7 @@ def filter_element(key, value):
 def ignore_path(path):
     ignored_paths = scout_config.value("ignore")
     for ignored in ignored_paths:
-        if path.startswith(ignored):
+        if path.lstrip(" /").startswith(ignored):
             return True
     return False
 

diff --git a/tests/integration/test_celery.py b/tests/integration/test_celery.py
@@ -71,7 +71,7 @@ def crash(foo, spam=None):
 def test_configuration_copied():
     celery_config = SimpleNamespace(SCOUT_IGNORE=["/foobar/"])
     with app_with_scout(celery_config=celery_config):
-        assert scout_config.value("ignore") == ["/foobar/"]
+        assert scout_config.value("ignore") == ["foobar/"]
 
 
 def test_hello_eager(tracked_requests):

diff --git a/tests/unit/core/test_config.py b/tests/unit/core/test_config.py
@@ -266,17 +266,17 @@ def test_sample_rate_conversion_from_python(original, converted):
 def test_endpoint_sampling_conversion_from_env():
     config = ScoutConfig()
     with mock.patch.dict(
-        os.environ, {"SCOUT_SAMPLE_ENDPOINTS": "/endpoint:40,/test:0"}
+        os.environ, {"SCOUT_SAMPLE_ENDPOINTS": " /endpoint:40,/test:0"}
     ):
         value = config.value("sample_endpoints")
     assert isinstance(value, dict)
-    assert value == {"/endpoint": 40, "/test": 0}
+    assert value == {"endpoint": 40, "test": 0}
 
 
 @pytest.mark.parametrize(
     "original, converted",
     [
-        ("/endpoint:40,/test:0", {"/endpoint": 40, "/test": 0}),
+        ("/endpoint:40,/test:0", {"endpoint": 40, "test": 0}),
         ({"endpoint": 40, "test": 0}, {"endpoint": 40, "test": 0}),
         ("", {}),
         (object(), {}),

diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py
@@ -15,15 +15,15 @@ def config():
         sample_rate=50,  # 50% global sampling
         sample_endpoints={
             "users/test": 0,  # Never sample specific endpoint
-            "users": 100,  # Always sample
+            "/users": 100,  # Always sample
             "test": 20,  # 20% sampling for test endpoints
-            "health": 0,  # Never sample health checks
+            "/health": 0,  # Never sample health checks
         },
         sample_jobs={
             "critical-job": 100,  # Always sample
-            "batch": 30,  # 30% sampling for batch jobs
+            "/batch": 30,  # 30% sampling for batch jobs
         },
-        ignore_endpoints=["metrics", "ping"],
+        ignore_endpoints=["/metrics", "ping"],
         ignore_jobs=["test-job"],
         endpoint_sample_rate=70,  # 70% sampling for unspecified endpoints
         job_sample_rate=40,  # 40% sampling for unspecified jobs