Skip to content

Commit

Permalink
feat: Add secrets masking (#182)
Browse files Browse the repository at this point in the history
# Secrets Masking
This feature adds secrets masking to enable users to replace sensitive
values in the checkpoint with a `[secret]` mask.


![image](https://github.com/user-attachments/assets/390c06af-b401-4772-baa1-917490661950)

Fixes #156

## Summary

- Add new `ComponentOpts` type
- Add support for selective or broad masking of props via property
paths.
- Support broad masking of outputs via the "output" property path. 
- Support Components and StreamComponent
- Support transitive secrets masking across children
- Extensive tests (16 cases, 700 lines) across all the edge cases I
could think of

## Implementation

Secrets can be masked by adding props property paths, or the "output"
key to a new `ComponentOpts.secrets` array.

Masked secrets are tracked by value, masking is transitive across
children, and we search props and outputs to mask any value even if it
just contains the masked value ( either `contains` or `===` will result
in masking, but we are more selective with masking for contains). For
instance if the secret is `secretsecretsecret` and a child component
uses that value in a template string to output `this is a
secretsecretsecret` the child output will be masked as `this is a
[secret]`.

We enforce this transitive relationship via context. We extended the
parentId context that was added for checkpointing to include the entire
node. This gives us access to the hierarchy of previously masked values.
For every node that gets added to the graph, we walk it's ancestors
looking for secret values that need to be masked in it's inputs and
outputs.

Value-based masking plus transitive child masking gives as much safety
as possible, and minimizes the amount that a user needs to think about
masking secrets when writing and composing component.

## Using Secret Outputs

We've added a new `ComponentOpts` type. Components accept `defaultOpts`
at declaration time, and
users can specify a `componentOpts` prop at the time of instantiation to
mark additional props and outputs as secrets.

```tsx
// specify defaults at declaration time
export const OpenAIProvider = gsx.Component<ClientOptions, never>(
  "OpenAIProvider",
  (args) => {
    const client = new OpenAI(args);
    return <OpenAIContext.Provider value={{ client }} />;
  },
  {
    secrets: ["apiKey"],
  },
);

// specify additive secret props at the time you use a component. `apiKey` will still be secret in this case.
<OpenAIProvider apiKey={key} foo={...} componentOpts={secrets: ["foo"]}
```

To make a prop, a secret refer to it by property path in the array: 


```tsx
interface Inputs {
  foo: string;
  bar: { buzz: number };
  baz: number;
}

const secrets = ["bar.buzz", "baz"]
```

All outputs can be masked by adding the `output` path to the array.
Selective output masking is not currently supported:

```tsx
<Component componentOpts={secrets: ["output"]}
```

If the property path refers to a property that is an object or array,
all nested values within that object or array will be masked as
`[secret]`, but the keys and structure of the object/array will stay
intact, primarily to help with debugging and visualization of outputs.
  • Loading branch information
EvanBoyle authored Jan 28, 2025
1 parent d7c33f2 commit 17826a7
Show file tree
Hide file tree
Showing 7 changed files with 911 additions and 11 deletions.
3 changes: 3 additions & 0 deletions packages/gensx-openai/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ export const OpenAIProvider = gsx.Component<ClientOptions, never>(
const client = new OpenAI(args);
return <OpenAIContext.Provider value={{ client }} />;
},
{
secrets: ["apiKey"],
},
);

// Create a component for chat completions
Expand Down
279 changes: 277 additions & 2 deletions packages/gensx/src/checkpoint.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { join } from "node:path";

import { ComponentOpts, STREAMING_PLACEHOLDER } from "./component";

// Cross-platform UUID generation
function generateUUID(): string {
try {
Expand Down Expand Up @@ -32,6 +34,7 @@ export interface ExecutionNode {
};
[key: string]: unknown;
};
componentOpts?: ComponentOpts;
}

export interface CheckpointWriter {
Expand All @@ -48,13 +51,27 @@ export interface CheckpointWriter {
export class CheckpointManager implements CheckpointWriter {
private nodes = new Map<string, ExecutionNode>();
private orphanedNodes = new Map<string, Set<ExecutionNode>>();
private _secretValues = new Map<string, Set<unknown>>(); // Internal per-node secrets
private currentNodeChain: string[] = []; // Track current execution context
private readonly MIN_SECRET_LENGTH = 8;
public root?: ExecutionNode;
public checkpointsEnabled: boolean;

// Track active checkpoint write
private activeCheckpoint: Promise<void> | null = null;
private pendingUpdate = false;

// Provide unified view of all secrets
get secretValues(): Set<unknown> {
const allSecrets = new Set<unknown>();
for (const secrets of this._secretValues.values()) {
for (const secret of secrets) {
allSecrets.add(secret);
}
}
return allSecrets;
}

constructor() {
// Set checkpointsEnabled based on environment variable
// Environment variables are strings, so check for common truthy values
Expand Down Expand Up @@ -180,12 +197,16 @@ export class CheckpointManager implements CheckpointWriter {
const baseUrl =
process.env.GENSX_CHECKPOINT_URL ?? "http://localhost:3000";
const url = join(baseUrl, "api/execution");

// Create a deep copy of the execution tree for masking
const maskedRoot = this.maskExecutionTree(structuredClone(this.root));

const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(this.root),
body: JSON.stringify(maskedRoot),
});

if (!response.ok) {
Expand All @@ -199,6 +220,217 @@ export class CheckpointManager implements CheckpointWriter {
}
}

private maskExecutionTree(node: ExecutionNode): ExecutionNode {
// Mask props
node.props = this.scrubSecrets(node.props, node.id) as Record<
string,
unknown
>;

// Mask output if present
if (node.output !== undefined) {
node.output = this.scrubSecrets(node.output, node.id, "output");
}

// Mask metadata if present
if (node.metadata) {
node.metadata = this.scrubSecrets(
node.metadata,
node.id,
"metadata",
) as Record<string, unknown>;
}

// Recursively mask children
node.children = node.children.map(child => this.maskExecutionTree(child));

return node;
}

private isEqual(a: unknown, b: unknown): boolean {
// Handle primitives
if (a === b) return true;

// If either isn't an object, they're not equal
if (!a || !b || typeof a !== "object" || typeof b !== "object") {
return false;
}

// Handle arrays
if (Array.isArray(a) && Array.isArray(b)) {
return (
a.length === b.length &&
a.every((item, index) => this.isEqual(item, b[index]))
);
}

// Handle objects
if (!Array.isArray(a) && !Array.isArray(b)) {
const aKeys = Object.keys(a);
const bKeys = Object.keys(b);
return (
aKeys.length === bKeys.length &&
aKeys.every(key =>
this.isEqual(a[key as keyof typeof a], b[key as keyof typeof b]),
)
);
}

return false;
}

private withNode<T>(nodeId: string, fn: () => T): T {
this.currentNodeChain.push(nodeId);
try {
return fn();
} finally {
this.currentNodeChain.pop();
}
}

private getEffectiveSecrets(): Set<unknown> {
const allSecrets = new Set<unknown>();
// Collect secrets from current node and all ancestors
for (const nodeId of this.currentNodeChain) {
const nodeSecrets = this._secretValues.get(nodeId);
if (nodeSecrets) {
for (const secret of nodeSecrets) {
allSecrets.add(secret);
}
}
}
return allSecrets;
}

private registerSecrets(
props: Record<string, unknown>,
paths: string[],
nodeId: string,
) {
this.withNode(nodeId, () => {
// Initialize secrets set for this node
let nodeSecrets = this._secretValues.get(nodeId);
if (!nodeSecrets) {
nodeSecrets = new Set();
this._secretValues.set(nodeId, nodeSecrets);
}

// Use paths purely for collection
for (const path of paths) {
const value = this.getValueAtPath(props, path);
if (value !== undefined) {
this.collectSecretValues(value, nodeSecrets);
}
}
});
}

private collectSecretValues(
data: unknown,
nodeSecrets: Set<unknown>,
visited = new WeakSet(),
): void {
// Skip if already visited to prevent cycles
if (data && typeof data === "object") {
if (visited.has(data)) {
return;
}
visited.add(data);
}

// Handle primitive values
if (typeof data === "string") {
if (data.length >= this.MIN_SECRET_LENGTH) {
nodeSecrets.add(data);
}
return;
}

// Skip other primitives
if (!data || typeof data !== "object") {
return;
}

// Handle arrays and objects (excluding ArrayBuffer views)
if (Array.isArray(data) || !ArrayBuffer.isView(data)) {
const values = Array.isArray(data) ? data : Object.values(data);
values.forEach(value => {
this.collectSecretValues(value, nodeSecrets, visited);
});
}
}

private scrubSecrets(data: unknown, nodeId?: string, path = ""): unknown {
return this.withNode(nodeId ?? "", () => {
// Handle primitive values
if (typeof data === "string" || typeof data === "number") {
const strValue = String(data);
return this.scrubString(strValue);
}

// Skip other primitives
if (!data || typeof data !== "object") {
return data;
}

// Handle arrays
if (Array.isArray(data)) {
return data.map((item, index) =>
this.scrubSecrets(
item,
nodeId,
path ? `${path}.${index}` : `${index}`,
),
);
}

// Handle objects (excluding ArrayBuffer views)
if (!ArrayBuffer.isView(data)) {
return Object.fromEntries(
Object.entries(data).map(([key, value]) => [
key,
this.scrubSecrets(value, nodeId, path ? `${path}.${key}` : key),
]),
);
}

return data;
});
}

private scrubString(value: string): string {
const effectiveSecrets = this.getEffectiveSecrets();
let result = value;

// Sort secrets by length (longest first) to handle overlapping secrets correctly
const secrets = Array.from(effectiveSecrets)
.filter(s => typeof s === "string" && s.length >= this.MIN_SECRET_LENGTH)
.sort((a, b) => String(b).length - String(a).length);

// Replace each secret with [secret]
for (const secret of secrets) {
if (typeof secret === "string") {
// Only replace if the secret is actually in the string
if (result.includes(secret)) {
const escapedSecret = secret.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const regex = new RegExp(escapedSecret, "g");
result = result.replace(regex, "[secret]");
}
}
}

return result;
}

private getValueAtPath(obj: Record<string, unknown>, path: string): unknown {
return path.split(".").reduce<unknown>((curr: unknown, key: string) => {
if (curr && typeof curr === "object") {
return (curr as Record<string, unknown>)[key];
}
return undefined;
}, obj);
}

/**
* Due to the async nature of component execution, nodes can arrive in any order.
* For example, in a tree like:
Expand Down Expand Up @@ -237,6 +469,12 @@ export class CheckpointManager implements CheckpointWriter {
...partialNode,
};

// Register any secrets from componentOpts
if (node.componentOpts?.secretProps) {
this.registerSecrets(node.props, node.componentOpts.secretProps, nodeId);
}

// Store raw values - masking happens at write time
this.nodes.set(node.id, node);

if (parentId) {
Expand Down Expand Up @@ -283,7 +521,24 @@ export class CheckpointManager implements CheckpointWriter {
const node = this.nodes.get(id);
if (node) {
node.endTime = Date.now();
// Store raw output - masking happens at write time
node.output = output;

// If output is marked as secret and not the streaming placeholder, collect secrets
if (
node.componentOpts?.secretOutputs &&
output !== STREAMING_PLACEHOLDER
) {
this.withNode(id, () => {
let nodeSecrets = this._secretValues.get(id);
if (!nodeSecrets) {
nodeSecrets = new Set();
this._secretValues.set(id, nodeSecrets);
}
this.collectSecretValues(output, nodeSecrets);
});
}

this.updateCheckpoint();
} else {
console.warn(`[Tracker] Attempted to complete unknown node:`, { id });
Expand All @@ -293,14 +548,34 @@ export class CheckpointManager implements CheckpointWriter {
addMetadata(id: string, metadata: Record<string, unknown>) {
const node = this.nodes.get(id);
if (node) {
node.metadata = { ...node.metadata, ...metadata };
// Store raw metadata - masking happens at write time
node.metadata = {
...node.metadata,
...metadata,
};
this.updateCheckpoint();
}
}

updateNode(id: string, updates: Partial<ExecutionNode>) {
const node = this.nodes.get(id);
if (node) {
// If output is being updated and it's marked as secret (and not the placeholder), collect secrets
if (
"output" in updates &&
node.componentOpts?.secretOutputs &&
updates.output !== STREAMING_PLACEHOLDER
) {
this.withNode(id, () => {
let nodeSecrets = this._secretValues.get(id);
if (!nodeSecrets) {
nodeSecrets = new Set();
this._secretValues.set(id, nodeSecrets);
}
this.collectSecretValues(updates.output, nodeSecrets);
});
}

Object.assign(node, updates);
this.updateCheckpoint();
} else {
Expand Down
Loading

0 comments on commit 17826a7

Please sign in to comment.