Skip to content

Commit

Permalink
feat: add tunnel cert rotation
Browse files Browse the repository at this point in the history
  • Loading branch information
MasterPtato committed Jan 8, 2025
1 parent 38e3ebd commit 96a17b3
Show file tree
Hide file tree
Showing 46 changed files with 1,151 additions and 69 deletions.
5 changes: 4 additions & 1 deletion Cargo.toml

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions packages/api/provision/src/route/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use uuid::Uuid;

pub mod datacenters;
pub mod servers;
pub mod tunnel;

define_router! {
routes: {
Expand All @@ -28,5 +29,11 @@ define_router! {
internal_endpoint: true,
),
},

"tunnel" / "tls": {
GET: tunnel::tls(
internal_endpoint: true,
),
}
},
}
24 changes: 24 additions & 0 deletions packages/api/provision/src/route/tunnel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use api_helper::{anchor::WatchIndexQuery, ctx::Ctx};
use rivet_api::models;
use rivet_operation::prelude::*;

use crate::auth::Auth;

// MARK: GET /tunnel/tls
pub async fn tls(
ctx: Ctx<Auth>,
_watch_index: WatchIndexQuery,
) -> GlobalResult<models::ProvisionTunnelGetTlsResponse> {
ctx.auth().server()?;

let tunnel_tls_res = ctx.op(cluster::ops::tunnel::tls_get::Input {}).await?;

let tls_config = &ctx.config().server()?.tls()?;
let ca_cert_pem = tls_config.root_ca_cert_pem.read();

Ok(models::ProvisionTunnelGetTlsResponse {
cert_pem: tunnel_tls_res.cert_pem.clone(),
root_ca_cert_pem: ca_cert_pem.clone(),
private_key_pem: tunnel_tls_res.private_key_pem.clone(),
})
}
3 changes: 1 addition & 2 deletions packages/common/config/src/config/server/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,7 @@ pub struct CloudflareZone {
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct Tls {
pub root_ca_cert_pem: Secret<String>,
pub cert_locally_signed_job_cert_pem: Secret<String>,
pub cert_locally_signed_job_key_pem: Secret<String>,
pub root_ca_key_pem: Secret<String>,
pub acme: TlsAcme,
}

Expand Down
16 changes: 16 additions & 0 deletions packages/common/service-manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ impl ServiceKind {
Cron(config) => ServiceBehavior::Cron(config.clone()),
}
}

pub fn eq(&self, other: &Self) -> bool {
use ServiceKind::*;

match (self, other) {
(ApiPublic, ApiPublic)
| (ApiEdge, ApiEdge)
| (ApiPrivate, ApiPrivate)
| (Standalone, Standalone)
| (Singleton, Singleton)
| (Oneshot, Oneshot)
| (Core, Core) => true,
(Cron(_), Cron(_)) => true,
_ => false,
}
}
}

/// Defines how a service should be ran.
Expand Down
12 changes: 6 additions & 6 deletions packages/infra/server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ colored_json = "5.0.0"
global-error.workspace = true
include_dir = "0.7.4"
indoc = "2.0.5"
reqwest = "0.12.9"
rivet-api.workspace = true
rivet-migrate.workspace = true
rivet-pools.workspace = true
Expand Down Expand Up @@ -58,6 +59,7 @@ workflow-gc.workspace = true
workflow-metrics-publish.workspace = true

# Cron
cluster-tunnel-tls-renew.workspace = true
telemetry-beacon.workspace = true
user-delete-pending.workspace = true

Expand All @@ -67,16 +69,14 @@ api-monolith-public.workspace = true

# Oneshot
build-default-create.workspace = true
chirp-client.workspace = true
chirp-workflow.workspace = true
cloud-default-create.workspace = true
cluster-default-update.workspace = true
pegboard-dc-init.workspace = true
rivet-config.workspace = true
reqwest = "0.12.9"
chirp-client.workspace = true
rivet-cache.workspace = true
chirp-workflow.workspace = true
rivet-config.workspace = true
rivet-connection.workspace = true
cloud-default-create.workspace = true


[dependencies.sqlx]
workspace = true
Expand Down
2 changes: 1 addition & 1 deletion packages/infra/server/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl Opts {
run_config
.services
.iter()
.filter(|x| service_kinds.iter().any(|y| *y == x.kind))
.filter(|x| service_kinds.iter().any(|y| y.eq(&x.kind)))
.cloned()
.collect::<Vec<_>>()
};
Expand Down
9 changes: 9 additions & 0 deletions packages/infra/server/src/run_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ pub fn config(rivet_config: rivet_config::Config) -> Result<RunConfigData> {
ServiceKind::Singleton,
|config, pools| Box::pin(cluster_datacenter_tls_renew::start(config, pools)),
));

services.push(Service::new(
"cluster_tunnel_tls_renew",
ServiceKind::Cron(CronConfig {
run_immediately: true,
schedule: "0 0 0 1 * *".into(),
}),
|config, pools| Box::pin(cluster_tunnel_tls_renew::start(config, pools)),
));
}

if server_config.rivet.auth.access_kind == rivet_config::config::rivet::AccessKind::Development
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE TABLE tunnel_tls (
_id INT PRIMARY KEY, -- Solely to allow ON CONFLICT, there should only be 1 row in this table
cert_pem TEXT,
private_key_pem TEXT,
state INT NOT NULL, -- cluster::types::TlsState
expire_ts INT NOT NULL
);
1 change: 1 addition & 0 deletions packages/services/cluster/src/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pub mod get_for_game;
pub mod list;
pub mod resolve_for_name_id;
pub mod server;
pub mod tunnel;
1 change: 1 addition & 0 deletions packages/services/cluster/src/ops/tunnel/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod tls_get;
32 changes: 32 additions & 0 deletions packages/services/cluster/src/ops/tunnel/tls_get.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use chirp_workflow::prelude::*;

use crate::types::TlsState;

#[derive(Debug)]
pub struct Input {}

#[derive(Debug)]
pub struct Output {
pub cert_pem: String,
pub private_key_pem: String,
}

#[operation]
pub async fn cluster_datacenter_tls_get(ctx: &OperationCtx, input: &Input) -> GlobalResult<Output> {
let row = sql_fetch_optional!(
[ctx, (String, String)]
"
SELECT cert_pem, private_key_pem
FROM db_cluster.tunnel_tls
WHERE state != $1
",
TlsState::Creating as i64,
)
.await?;
let (cert_pem, private_key_pem) = unwrap!(row, "tunnel tls not created yet");

Ok(Output {
cert_pem,
private_key_pem,
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,40 @@ pub fn fetch_info(server_token: &str) -> GlobalResult<String> {
))
}

pub fn fetch_tls(
pub fn fetch_tunnel_tls(
initialize_immediately: bool,
server_token: &str,
traefik_instance_name: &str,
) -> GlobalResult<String> {
let mut script = include_str!("../files/rivet_fetch_tunnel_tls.sh")
.replace("__TRAEFIK_INSTANCE_NAME__", traefik_instance_name)
.replace("__SERVER_TOKEN__", server_token)
.replace(
"__TUNNEL_API_EDGE_API__",
&format!("http://127.0.0.1:{TUNNEL_API_EDGE_PORT}"),
);

if initialize_immediately {
// Start timer & run script immediately
script.push_str(indoc!(
"
systemctl start rivet_fetch_tunnel_tls.timer
systemctl start --no-block rivet_fetch_tunnel_tls.service
"
));
}

Ok(script)
}

pub fn fetch_gg_tls(
initialize_immediately: bool,
server_token: &str,
traefik_instance_name: &str,
datacenter_id: Uuid,
) -> GlobalResult<String> {
let mut script = include_str!("../files/rivet_fetch_tls.sh")
.replace("__NAME__", traefik_instance_name)
let mut script = include_str!("../files/rivet_fetch_gg_tls.sh")
.replace("__TRAEFIK_INSTANCE_NAME__", traefik_instance_name)
.replace("__SERVER_TOKEN__", server_token)
.replace(
"__TUNNEL_API_EDGE_API__",
Expand All @@ -41,8 +67,8 @@ pub fn fetch_tls(
// Start timer & run script immediately
script.push_str(indoc!(
"
systemctl start rivet_fetch_tls.timer
systemctl start --no-block rivet_fetch_tls.service
systemctl start rivet_fetch_gg_tls.timer
systemctl start --no-block rivet_fetch_gg_tls.service
"
));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pub fn install() -> String {
include_str!("../files/traefik.sh").to_string()
}

#[derive(Clone)]
pub struct TlsCert {
pub cert_pem: String,
pub key_pem: String,
Expand All @@ -71,17 +72,18 @@ pub struct Instance {
pub struct ServerTransport {
pub server_name: String,
pub root_cas: Vec<String>,
/// IMPORTANT: Make sure the first cert is always the tunnel cert.
pub certs: Vec<TlsCert>,
}

/// Creates a Traefik instance.
///
/// Requires `install()`.
pub fn instance(config: Instance) -> String {
pub fn instance(config: Instance) -> GlobalResult<String> {
let config_name = &config.name;

let mut script = include_str!("../files/traefik_instance.sh")
.replace("__NAME__", &config.name)
.replace("__TRAEFIK_INSTANCE_NAME__", &config.name)
.replace("__STATIC_CONFIG__", &config.static_config)
.replace("__DYNAMIC_CONFIG__", &config.dynamic_config);

Expand Down Expand Up @@ -150,10 +152,14 @@ pub fn instance(config: Instance) -> String {
));
}

script
Ok(script)
}

pub fn tunnel(config: &rivet_config::Config, name: &str) -> GlobalResult<String> {
pub fn tunnel(
config: &rivet_config::Config,
name: &str,
tunnel_cert: &TlsCert,
) -> GlobalResult<String> {
// Build transports for each service
let tls_config = &config.server()?.tls()?;
let mut tcp_server_transports = HashMap::new();
Expand All @@ -163,20 +169,17 @@ pub fn tunnel(config: &rivet_config::Config, name: &str) -> GlobalResult<String>
ServerTransport {
server_name: format!("{name}.tunnel.rivet.gg"),
root_cas: vec![tls_config.root_ca_cert_pem.read().clone()],
certs: vec![TlsCert {
cert_pem: tls_config.cert_locally_signed_job_cert_pem.read().clone(),
key_pem: tls_config.cert_locally_signed_job_key_pem.read().clone(),
}],
certs: vec![tunnel_cert.clone()],
},
);
}

Ok(instance(Instance {
instance(Instance {
name: name.to_string(),
static_config: tunnel_static_config(),
dynamic_config: tunnel_dynamic_config(&config.server()?.rivet.tunnel.public_host),
tcp_server_transports,
}))
})
}

fn tunnel_static_config() -> String {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# Create dir to hold TLS certs
#
# The Traefik install script also creates these directories (and chown them),
# but we need the dirs to exist for the rivet_fetch_tls.sh script to run before
# but we need the dirs to exist for the rivet_fetch_gg_tls.sh script to run before
# Traefik is installed when using initialize_immediately.
mkdir -p /etc/__NAME__/dynamic/tls /etc/__NAME__/tls
mkdir -p /etc/__TRAEFIK_INSTANCE_NAME__/dynamic/tls /etc/__TRAEFIK_INSTANCE_NAME__/tls

# Write script
cat << 'EOF' > /usr/bin/rivet_fetch_tls.sh
cat << 'EOF' > /usr/bin/rivet_fetch_gg_tls.sh
#!/usr/bin/env bash
set -eu -o pipefail
CERT_ID="job"
STUB="/etc/__NAME__/tls/$CERT_ID"
STUB="/etc/__TRAEFIK_INSTANCE_NAME__/tls/$CERT_ID"
# Retry script every 5 seconds until success
Expand All @@ -31,20 +31,20 @@ echo $response | jq -r .job_cert_pem > "${STUB}_cert.pem"
echo $response | jq -r .job_private_key_pem > "${STUB}_key.pem"
# Write traefik config file
cat << EOF2 > "/etc/__NAME__/dynamic/tls/${CERT_ID}.toml"
cat << EOF2 > "/etc/__TRAEFIK_INSTANCE_NAME__/dynamic/tls/${CERT_ID}.toml"
[[tls.certificates]]
certFile = "${STUB}_cert.pem"
keyFile = "${STUB}_key.pem"
EOF2
# Force config reload
touch /etc/__NAME__/dynamic
touch /etc/__TRAEFIK_INSTANCE_NAME__/dynamic
EOF

chmod +x /usr/bin/rivet_fetch_tls.sh
chmod +x /usr/bin/rivet_fetch_gg_tls.sh

# Create systemd service file
cat << 'EOF' > /etc/systemd/system/rivet_fetch_tls.service
cat << 'EOF' > /etc/systemd/system/rivet_fetch_gg_tls.service
[Unit]
Description=Rivet TLS Fetch
Requires=network-online.target
Expand All @@ -54,14 +54,14 @@ After=network-online.target
User=root
Group=root
Type=oneshot
ExecStart=/usr/bin/rivet_fetch_tls.sh
ExecStart=/usr/bin/rivet_fetch_gg_tls.sh
[Install]
WantedBy=multi-user.target
EOF

# Create systemd timer file
cat << 'EOF' > /etc/systemd/system/rivet_fetch_tls.timer
cat << 'EOF' > /etc/systemd/system/rivet_fetch_gg_tls.timer
[Unit]
Description=Runs TLS fetch every minute
Requires=network-online.target
Expand All @@ -74,7 +74,7 @@ OnBootSec=0
OnCalendar=*:0
# Prevent stampeding herd
RandomizedDelaySec=60
Unit=rivet_fetch_tls.service
Unit=rivet_fetch_gg_tls.service
# Real time service
CPUSchedulingPolicy=fifo
Expand All @@ -89,5 +89,5 @@ EOF

# Enable tls fetch script to run on reboot
systemctl daemon-reload
systemctl enable rivet_fetch_tls.timer
systemctl enable rivet_fetch_tls.service
systemctl enable rivet_fetch_gg_tls.timer
systemctl enable rivet_fetch_gg_tls.service
Loading

0 comments on commit 96a17b3

Please sign in to comment.