diff --git a/crates/stackable-operator/CHANGELOG.md b/crates/stackable-operator/CHANGELOG.md index 433bec2b..76082c78 100644 --- a/crates/stackable-operator/CHANGELOG.md +++ b/crates/stackable-operator/CHANGELOG.md @@ -9,19 +9,22 @@ All notable changes to this project will be documented in this file. - BREAKING: Aggregate emitted Kubernetes events on the CustomResources thanks to the new [kube feature](https://github.com/kube-rs/controller-rs/pull/116). Instead of reporting the same event multiple times it now uses `EventSeries` to aggregate these events to single entry with an - age like `3s (x11 over 53s)` ([#867]): + age like `3s (x11 over 53s)` ([#938]): - The `report_controller_error` function now needs to be async. - It now takes `Recorder` as a parameter instead of a `Client`. - The `Recorder` instance needs to be available across all `reconcile` invocations, to ensure aggregation works correctly. - The operator needs permission to `patch` events (previously only `create` was needed). +- Add `ProductSpecificCommonConfig`, so that product operators can have custom fields within `commonConfig`. + Also add a `JavaCommonConfig`, which can be used by JVM-based tools to offer `jvmArgumentOverrides` with this mechanism ([#931]) ### Changed -- BREAKING: Bump Rust dependencies to enable Kubernetes 1.32 (via `kube` 0.98.0 and `k8s-openapi` - 0.23.0) ([#867]). +- BREAKING: Bump Rust dependencies to enable Kubernetes 1.32 (via `kube` 0.98.0 and `k8s-openapi` 0.23.0) ([#938]). - BREAKING: Append a dot to the default cluster domain to make it a FQDN and allow FQDNs when validating a `DomainName` ([#939]). +[#931]: https://github.com/stackabletech/operator-rs/pull/931 +[#938]: https://github.com/stackabletech/operator-rs/pull/938 [#939]: https://github.com/stackabletech/operator-rs/pull/939 ## [0.83.0] - 2024-12-03 diff --git a/crates/stackable-operator/src/product_config_utils.rs b/crates/stackable-operator/src/product_config_utils.rs index 1d1a68c6..24095775 100644 --- a/crates/stackable-operator/src/product_config_utils.rs +++ b/crates/stackable-operator/src/product_config_utils.rs @@ -167,13 +167,21 @@ pub fn config_for_role_and_group<'a>( /// - `resource` - Not used directly. It's passed on to the `Configuration::compute_*` calls. /// - `roles` - A map keyed by role names. The value is a tuple of a vector of `PropertyNameKind` /// like (Cli, Env or Files) and [`crate::role_utils::Role`] with a boxed [`Configuration`]. -pub fn transform_all_roles_to_config( +#[allow(clippy::type_complexity)] +pub fn transform_all_roles_to_config( resource: &T::Configurable, - roles: HashMap, Role)>, + roles: HashMap< + String, + ( + Vec, + Role, + ), + >, ) -> Result where T: Configuration, U: Default + JsonSchema + Serialize, + ProductSpecificCommonConfig: Default + JsonSchema + Serialize, { let mut result = HashMap::new(); @@ -359,15 +367,16 @@ fn process_validation_result( /// - `role_name` - The name of the role. /// - `role` - The role for which to transform the configuration parameters. /// - `property_kinds` - Used as "buckets" to partition the configuration properties by. -fn transform_role_to_config( +fn transform_role_to_config( resource: &T::Configurable, role_name: &str, - role: &Role, + role: &Role, property_kinds: &[PropertyNameKind], ) -> Result where T: Configuration, U: Default + JsonSchema + Serialize, + ProductSpecificCommonConfig: Default + JsonSchema + Serialize, { let mut result = HashMap::new(); @@ -422,10 +431,10 @@ where /// - `role_name` - Not used directly but passed on to the `Configuration::compute_*` calls. /// - `config` - The configuration properties to partition. /// - `property_kinds` - The "buckets" used to partition the configuration properties. -fn parse_role_config( +fn parse_role_config( resource: &::Configurable, role_name: &str, - config: &CommonConfiguration, + config: &CommonConfiguration, property_kinds: &[PropertyNameKind], ) -> Result>>> where @@ -452,8 +461,8 @@ where Ok(result) } -fn parse_role_overrides( - config: &CommonConfiguration, +fn parse_role_overrides( + config: &CommonConfiguration, property_kinds: &[PropertyNameKind], ) -> Result>>> where @@ -489,8 +498,8 @@ where Ok(result) } -fn parse_file_overrides( - config: &CommonConfiguration, +fn parse_file_overrides( + config: &CommonConfiguration, file: &str, ) -> Result>> where @@ -522,7 +531,7 @@ mod tests { } use super::*; - use crate::role_utils::{Role, RoleGroup}; + use crate::role_utils::{GenericProductSpecificCommonConfig, Role, RoleGroup}; use k8s_openapi::api::core::v1::PodTemplateSpec; use rstest::*; use std::collections::HashMap; @@ -610,13 +619,14 @@ mod tests { config_overrides: Option>>, env_overrides: Option>, cli_overrides: Option>, - ) -> CommonConfiguration> { + ) -> CommonConfiguration, GenericProductSpecificCommonConfig> { CommonConfiguration { config: test_config.unwrap_or_default(), config_overrides: config_overrides.unwrap_or_default(), env_overrides: env_overrides.unwrap_or_default(), cli_overrides: cli_overrides.unwrap_or_default(), pod_overrides: PodTemplateSpec::default(), + product_specific_common_config: GenericProductSpecificCommonConfig::default(), } } diff --git a/crates/stackable-operator/src/role_utils.rs b/crates/stackable-operator/src/role_utils.rs index 04ecd259..e7767279 100644 --- a/crates/stackable-operator/src/role_utils.rs +++ b/crates/stackable-operator/src/role_utils.rs @@ -81,7 +81,7 @@ //! Each resource can have more operator specific labels. use std::{ - collections::{BTreeMap, HashMap}, + collections::{BTreeMap, HashMap, HashSet}, fmt::{Debug, Display}, }; @@ -97,20 +97,35 @@ use crate::{ use educe::Educe; use k8s_openapi::api::core::v1::PodTemplateSpec; use kube::{runtime::reflector::ObjectRef, Resource}; +use regex::Regex; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use snafu::{OptionExt, ResultExt, Snafu}; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("missing roleGroup {role_group:?}"))] + MissingRoleGroup { role_group: String }, + + #[snafu(display( + "Could not parse regex from \"jvmArgumentOverrides.removeRegex\", ignoring it (there might be some added anchors at the start and end): {regex:?}" + ))] + InvalidRemoveRegex { source: regex::Error, regex: String }, +} #[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] #[serde( rename_all = "camelCase", - bound(deserialize = "T: Default + Deserialize<'de>") + bound( + deserialize = "T: Default + Deserialize<'de>, ProductSpecificCommonConfig: Default + Deserialize<'de>" + ) )] -pub struct CommonConfiguration { +pub struct CommonConfiguration { #[serde(default)] // We can't depend on T being `Default`, since that trait is not object-safe // We only need to generate schemas for fully specified types, but schemars_derive // does not support specifying custom bounds. - #[schemars(default = "config_schema_default")] + #[schemars(default = "Self::default_config")] pub config: T, /// The `configOverrides` can be used to configure properties in product config files @@ -144,10 +159,122 @@ pub struct CommonConfiguration { #[serde(default)] #[schemars(schema_with = "raw_object_schema")] pub pod_overrides: PodTemplateSpec, + + // No docs needed, as we flatten this struct. + // + // This field is product-specific and can contain e.g. jvmArgumentOverrides. + // It is not accessible by operators, please use [`Role::get_product_specific_common_configs`] to read the values. + #[serde(flatten, default)] + pub(crate) product_specific_common_config: ProductSpecificCommonConfig, +} + +impl CommonConfiguration { + fn default_config() -> serde_json::Value { + serde_json::json!({}) + } +} + +#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct GenericProductSpecificCommonConfig {} + +#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct JavaCommonConfig { + /// Allows overriding JVM arguments. + // + /// Please read on the [JVM argument overrides documentation](DOCS_BASE_URL_PLACEHOLDER/concepts/overrides#jvm-argument-overrides) + /// for details on the usage. + #[serde(default)] + pub jvm_argument_overrides: JvmArgumentOverrides, +} + +#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct JvmArgumentOverrides { + /// JVM arguments to be added + #[serde(default)] + add: Vec, + + /// JVM arguments to be removed by exact match + // + // HashSet to be optimized for quick lookup + #[serde(default)] + remove: HashSet, + + /// JVM arguments matching any of this regexes will be removed + #[serde(default)] + remove_regex: Vec, +} + +impl JvmArgumentOverrides { + pub fn new(add: Vec, remove: HashSet, remove_regex: Vec) -> Self { + Self { + add, + remove, + remove_regex, + } + } + + pub fn new_with_only_additions(add: Vec) -> Self { + Self { + add, + ..Default::default() + } + } + + /// Called on **merged** [`JvmArgumentOverrides`}, returns all arguments that should be passed to the JVM. + /// + /// **Can only be called on merged config, it will panic otherwise!** + /// + /// We are panicking (instead of returning an Error), because this is not the users fault, but + /// the operator is doing things wrong + pub fn effective_jvm_config_after_merging(&self) -> &Vec { + assert!( + self.remove.is_empty(), + "After merging there should be no removals left. \"effective_jvm_config_after_merging\" should only be called on merged configs!" + ); + assert!( + self.remove_regex.is_empty(), + "After merging there should be no removal regexes left. \"effective_jvm_config_after_merging\" should only be called on merged configs!" + ); + + &self.add + } } -fn config_schema_default() -> serde_json::Value { - serde_json::json!({}) +/// We can not use [`Merge`] here, as this function can fail, e.g. if an invalid regex is specified by the user +impl JvmArgumentOverrides { + /// Please watch out: Merge order is complicated for this merge. + /// Test your code! + pub fn try_merge(&mut self, defaults: &Self) -> Result<(), Error> { + let regexes = self + .remove_regex + .iter() + .map(|regex| { + let without_anchors = regex.trim_start_matches('^').trim_end_matches('$'); + let with_anchors = format!("^{without_anchors}$"); + + Regex::new(&with_anchors).with_context(|_| InvalidRemoveRegexSnafu { + regex: with_anchors, + }) + }) + .collect::, Error>>()?; + + let new_add = defaults + .add + .iter() + .filter(|arg| !self.remove.contains(*arg)) + .filter(|arg| !regexes.iter().any(|regex| regex.is_match(arg))) + .chain(self.add.iter()) + .cloned() + .collect(); + + self.add = new_add; + self.remove = HashSet::new(); + self.remove_regex = Vec::new(); + + Ok(()) + } } /// This struct represents a role - e.g. HDFS datanodes or Trino workers. It has a key-value-map containing @@ -168,33 +295,46 @@ fn config_schema_default() -> serde_json::Value { // However, product-operators can define their own - custom - struct and use that here. #[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] -pub struct Role -where +pub struct Role< + T, + U = GenericRoleConfig, + ProductSpecificCommonConfig = GenericProductSpecificCommonConfig, +> where // Don't remove this trait bounds!!! // We don't know why, but if you remove either of them, the generated default value in the CRDs will // be missing! U: Default + JsonSchema + Serialize, + ProductSpecificCommonConfig: Default + JsonSchema + Serialize, { - #[serde(flatten, bound(deserialize = "T: Default + Deserialize<'de>"))] - pub config: CommonConfiguration, + #[serde( + flatten, + bound( + deserialize = "T: Default + Deserialize<'de>, ProductSpecificCommonConfig: Deserialize<'de>" + ) + )] + pub config: CommonConfiguration, #[serde(default)] pub role_config: U, - pub role_groups: HashMap>, + pub role_groups: HashMap>, } -impl Role +impl Role where T: Configuration + 'static, U: Default + JsonSchema + Serialize, + ProductSpecificCommonConfig: Default + JsonSchema + Serialize + Clone + Merge, { /// This casts a generic struct implementing [`crate::product_config_utils::Configuration`] /// and used in [`Role`] into a Box of a dynamically dispatched /// [`crate::product_config_utils::Configuration`] Trait. This is required to use the generic /// [`Role`] with more than a single generic struct. For example different roles most likely /// have different structs implementing Configuration. - pub fn erase(self) -> Role>, U> { + pub fn erase( + self, + ) -> Role>, U, ProductSpecificCommonConfig> + { Role { config: CommonConfiguration { config: Box::new(self.config.config) @@ -203,6 +343,7 @@ where env_overrides: self.config.env_overrides, cli_overrides: self.config.cli_overrides, pod_overrides: self.config.pod_overrides, + product_specific_common_config: self.config.product_specific_common_config, }, role_config: self.role_config, role_groups: self @@ -219,6 +360,9 @@ where env_overrides: group.config.env_overrides, cli_overrides: group.config.cli_overrides, pod_overrides: group.config.pod_overrides, + product_specific_common_config: group + .config + .product_specific_common_config, }, replicas: group.replicas, }, @@ -229,6 +373,43 @@ where } } +impl Role +where + U: Default + JsonSchema + Serialize, +{ + /// Merges jvm argument overrides from + /// + /// 1. It takes the operator generated JVM args + /// 2. It applies role level overrides + /// 3. It applies roleGroup level overrides + pub fn get_merged_jvm_argument_overrides( + &self, + role_group: &str, + operator_generated: &JvmArgumentOverrides, + ) -> Result { + let from_role = &self + .config + .product_specific_common_config + .jvm_argument_overrides; + let from_role_group = &self + .role_groups + .get(role_group) + .with_context(|| MissingRoleGroupSnafu { role_group })? + .config + .product_specific_common_config + .jvm_argument_overrides; + + // Please note that the merge order is different than we normally do! + // This is not trivial, as the merge operation is not purely additive (as it is with e.g. `PodTemplateSpec). + let mut from_role = from_role.clone(); + from_role.try_merge(operator_generated)?; + let mut from_role_group = from_role_group.clone(); + from_role_group.try_merge(&from_role)?; + + Ok(from_role_group) + } +} + /// This is a product-agnostic RoleConfig, which is sufficient for most of the products. #[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] @@ -246,15 +427,17 @@ pub struct EmptyRoleConfig {} #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] #[serde( rename_all = "camelCase", - bound(deserialize = "T: Default + Deserialize<'de>") + bound( + deserialize = "T: Default + Deserialize<'de>, ProductSpecificCommonConfig: Default + Deserialize<'de>" + ) )] -pub struct RoleGroup { +pub struct RoleGroup { #[serde(flatten)] - pub config: CommonConfiguration, + pub config: CommonConfiguration, pub replicas: Option, } -impl RoleGroup { +impl RoleGroup { pub fn validate_config( &self, role: &Role, @@ -296,3 +479,118 @@ impl Display for RoleGroupRef { )) } } + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use crate::role_utils::JavaCommonConfig; + + use super::*; + + #[test] + fn test_merge_java_common_config() { + // The operator generates some JVM arguments + let operator_generated = JvmArgumentOverrides::new_with_only_additions( + [ + "-Xms34406m".to_owned(), + "-Xmx34406m".to_owned(), + "-XX:+UseG1GC".to_owned(), + "-XX:+ExitOnOutOfMemoryError".to_owned(), + "-Djava.protocol.handler.pkgs=sun.net.www.protocol".to_owned(), + "-Dsun.net.http.allowRestrictedHeaders=true".to_owned(), + "-Djava.security.properties=/stackable/nifi/conf/security.properties".to_owned(), + ] + .into(), + ); + + let entire_role: Role<(), GenericRoleConfig, JavaCommonConfig> = + serde_yaml::from_str(" + # Let's say we want to set some additional HTTP Proxy and IPv4 settings + # And we don't like the garbage collector for some reason... + jvmArgumentOverrides: + remove: + - -XX:+UseG1GC + add: # Add some networking arguments + - -Dhttps.proxyHost=proxy.my.corp + - -Dhttps.proxyPort=8080 + - -Djava.net.preferIPv4Stack=true + roleGroups: + default: + # For the roleGroup, let's say we need a different memory config. + # For that to work we first remove the flags generated by the operator and add our own. + # Also we override the proxy port to test that the roleGroup config takes precedence over the role config. + jvmArgumentOverrides: + removeRegex: + - -Xmx.* + - -Dhttps.proxyPort=.* + add: + - -Xmx40000m + - -Dhttps.proxyPort=1234 + ") + .expect("Failed to parse role"); + + let merged_jvm_argument_overrides = entire_role + .get_merged_jvm_argument_overrides("default", &operator_generated) + .expect("Failed to merge jvm argument overrides"); + + let expected = Vec::from([ + "-Xms34406m".to_owned(), + "-XX:+ExitOnOutOfMemoryError".to_owned(), + "-Djava.protocol.handler.pkgs=sun.net.www.protocol".to_owned(), + "-Dsun.net.http.allowRestrictedHeaders=true".to_owned(), + "-Djava.security.properties=/stackable/nifi/conf/security.properties".to_owned(), + "-Dhttps.proxyHost=proxy.my.corp".to_owned(), + "-Djava.net.preferIPv4Stack=true".to_owned(), + "-Xmx40000m".to_owned(), + "-Dhttps.proxyPort=1234".to_owned(), + ]); + + assert_eq!( + merged_jvm_argument_overrides, + JvmArgumentOverrides { + add: expected.clone(), + remove: HashSet::new(), + remove_regex: Vec::new() + } + ); + + assert_eq!( + merged_jvm_argument_overrides.effective_jvm_config_after_merging(), + &expected + ); + } + + #[test] + fn test_merge_java_common_config_keep_order() { + let operator_generated = + JvmArgumentOverrides::new_with_only_additions(["-Xms1m".to_owned()].into()); + + let entire_role: Role<(), GenericRoleConfig, JavaCommonConfig> = serde_yaml::from_str( + " + jvmArgumentOverrides: + add: + - -Xms2m + roleGroups: + default: + jvmArgumentOverrides: + add: + - -Xms3m + ", + ) + .expect("Failed to parse role"); + + let merged_jvm_argument_overrides = entire_role + .get_merged_jvm_argument_overrides("default", &operator_generated) + .expect("Failed to merge jvm argument overrides"); + + assert_eq!( + merged_jvm_argument_overrides.effective_jvm_config_after_merging(), + &[ + "-Xms1m".to_owned(), + "-Xms2m".to_owned(), + "-Xms3m".to_owned() + ] + ); + } +}