Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remote cache logger - new plugin to help debug remote cache #21294

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/docs/using-pants/key-concepts/backends.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,4 @@ The list of all backends (both stable and experimental) is also available via `p
| `pants.backend.experimental.visibility` | Enables `__dependencies_rules__` and `__dependents_rules__` | [Visibility](./targets-and-build-files.mdx#visibility) |
| `pants.backend.python.providers.experimental.pyenv` | Enables Pants to manage appropriate Python interpreters via pyenv | |
| `pants.backend.python.providers.experimental.pyenv.custom_install` | Enables customising how the pyenv provider builds a Python interpreter | |
| `pants.backend.tools.experimental.remote_cache_logger` | Enables the remote cache logger for debeugging remote cache
1 change: 1 addition & 0 deletions pants.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ backend_packages.add = [
"pants.backend.experimental.scala.lint.scalafix",
"pants.backend.experimental.scala.debug_goals",
"pants.backend.experimental.tools.workunit_logger",
"pants.backend.experimental.tools.remote_cache_logger",
"pants.backend.experimental.visibility",
"pants.backend.tools.preamble",
"pants.backend.tools.taplo",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
python_sources()
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

from pants.backend.tools.remote_cache_logger import rules as remote_cache_logger_rules


def rules():
return remote_cache_logger_rules.rules()
4 changes: 4 additions & 0 deletions src/python/pants/backend/tools/remote_cache_logger/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_sources()
Empty file.
87 changes: 87 additions & 0 deletions src/python/pants/backend/tools/remote_cache_logger/rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import csv
import logging
from typing import Any, Dict, Mapping, Tuple

from pants.engine.internals.scheduler import Workunit
from pants.engine.rules import collect_rules, rule
from pants.engine.streaming_workunit_handler import (
StreamingWorkunitContext,
WorkunitsCallback,
WorkunitsCallbackFactory,
WorkunitsCallbackFactoryRequest,
)
from pants.engine.unions import UnionRule
from pants.option.option_types import BoolOption, StrOption
from pants.option.subsystem import Subsystem
from pants.util.dirutil import safe_open

logger = logging.getLogger(__name__)


class RemoteCacheLoggerCallback(WorkunitsCallback):
"""Configuration for RemoteCacheLogger."""

def __init__(self, wulogger: "RemoteCacheLogger"):
self.wulogger = wulogger
self._completed_workunits: Dict[str, Mapping[str, Any]] = {}

@property
def can_finish_async(self) -> bool:
return False

def __call__(
self,
*,
completed_workunits: Tuple[Workunit, ...],
started_workunits: Tuple[Workunit, ...],
context: StreamingWorkunitContext,
finished: bool = False,
**kwargs: Any,
) -> None:
for wu in completed_workunits:
if wu["name"] == "remote_cache_read_speculation":
self._completed_workunits[wu["span_id"]] = {
"description": wu["metadata"]["user_metadata"]["request_description"],
"action_digest": wu["metadata"]["user_metadata"]["action_digest"],
"outcome": wu["metadata"]["user_metadata"]["outcome"],
"request": wu["metadata"]["user_metadata"]["request"],
}
if finished:
filepath = f"{self.wulogger.logdir}/{context.run_tracker.run_id}.csv"
with safe_open(filepath, "w", newline="") as f:
fieldnames = ["description", "action_digest", "outcome", "request"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self._completed_workunits.values())
logger.info(f"Wrote log to {filepath}")


class RemoteCacheLoggerCallbackFactoryRequest:
"""A unique request type that is installed to trigger construction of our WorkunitsCallback."""


class RemoteCacheLogger(Subsystem):
options_scope = "remote-cache-logger"
help = "Remote Cache Logger subsystem. Useful for debugging remote cache."

enabled = BoolOption("--enabled", default=False, help="Whether to enable remote cache logging.")
logdir = StrOption("--logdir", default=".pants.d/workdir", help="Where to write the log to.")


@rule
def construct_callback(
_: RemoteCacheLoggerCallbackFactoryRequest,
wulogger: RemoteCacheLogger,
) -> WorkunitsCallbackFactory:
return WorkunitsCallbackFactory(
lambda: RemoteCacheLoggerCallback(wulogger) if wulogger.enabled else None
)


def rules():
return [
UnionRule(WorkunitsCallbackFactoryRequest, RemoteCacheLoggerCallbackFactoryRequest),
*collect_rules(),
]
1 change: 1 addition & 0 deletions src/python/pants/bin/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ target(
"src/python/pants/backend/experimental/tools/trufflehog",
"src/python/pants/backend/experimental/tools/workunit_logger",
"src/python/pants/backend/experimental/tools/yamllint",
"src/python/pants/backend/experimental/tools/remote_cache_logger",
"src/python/pants/backend/experimental/typescript",
"src/python/pants/backend/experimental/visibility",
"src/python/pants/backend/google_cloud_function/python",
Expand Down
51 changes: 51 additions & 0 deletions src/rust/engine/process_execution/remote/src/remote_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use process_execution::{
ProcessExecutionEnvironment, ProcessResultSource,
};
use process_execution::{make_execute_request, EntireExecuteRequest};
use workunit_store::UserMetadataItem;

// Consumers of this crate shouldn't need to worry about the exact crate structure that comes
// together to make a remote cache command runner.
Expand Down Expand Up @@ -318,6 +319,12 @@ impl CommandRunner {
in_workunit!(
"remote_cache_read_speculation",
Level::Trace,
user_metadata = vec![(
"action_digest".to_owned(),
UserMetadataItem::String(format!("{action_digest:?}"))),
("request_description".to_owned(), UserMetadataItem::String(request.description.clone())),
("request".to_owned(), UserMetadataItem::String(format!("{request:?}"))),
],
|workunit| async move {
tokio::select! {
cache_result = &mut cache_read_future => {
Expand All @@ -330,6 +337,24 @@ impl CommandRunner {
}
local_result = &mut local_execution_future => {
workunit.increment_counter(Metric::RemoteCacheSpeculationLocalCompletedFirst, 1);
workunit.update_metadata(|initial| {
initial.map(|(initial, _)| {
(
WorkunitMetadata {
user_metadata: initial
.user_metadata
.into_iter()
.chain(std::iter::once((
"outcome".to_owned(),
UserMetadataItem::String("remote miss".to_owned()),
)))
.collect(),
..initial
},
Level::Trace,
)
})
});
local_result.map(|res| (res, false))
}
}
Expand Down Expand Up @@ -366,6 +391,14 @@ impl CommandRunner {
(
WorkunitMetadata {
desc: initial.desc.as_ref().map(|desc| format!("Hit: {desc}")),
user_metadata: initial
.user_metadata
.into_iter()
.chain(std::iter::once((
"outcome".to_owned(),
UserMetadataItem::String("remote hit".to_owned()),
)))
.collect(),
..initial
},
Level::Debug,
Expand All @@ -374,6 +407,24 @@ impl CommandRunner {
});
Ok((cached_response, true))
} else {
workunit.update_metadata(|initial| {
initial.map(|(initial, _)| {
(
WorkunitMetadata {
user_metadata: initial
.user_metadata
.into_iter()
.chain(std::iter::once((
"outcome".to_owned(),
UserMetadataItem::String("remote miss".to_owned()),
)))
.collect(),
..initial
},
Level::Trace,
)
})
});
// Note that we don't increment a counter here, as there is nothing of note in this
// scenario: the remote cache did not save unnecessary local work, nor was the remote
// trip unusually slow such that local execution was faster.
Expand Down
Loading