diff --git a/example_config.yaml b/example_config.yaml index df56444..cdeaa61 100644 --- a/example_config.yaml +++ b/example_config.yaml @@ -17,3 +17,8 @@ schema: - name: resource type: !dynamic_object expand_dots: true + - name: attributes + type: !static_object + fields: + - name: class + type: !text diff --git a/src/commands/create.rs b/src/commands/create.rs index 29add26..f87b922 100644 --- a/src/commands/create.rs +++ b/src/commands/create.rs @@ -1,11 +1,23 @@ -use color_eyre::Result; +use color_eyre::{eyre::bail, Result}; use sqlx::{query, PgPool}; -use crate::{args::CreateArgs, config::IndexConfig}; +use crate::{ + args::CreateArgs, + config::{FieldType, IndexConfig}, +}; pub async fn run_create(args: CreateArgs, pool: PgPool) -> Result<()> { let config = IndexConfig::from_path(&args.config_path).await?; + let array_static_object_exists = config + .schema + .fields + .iter() + .any(|x| x.array && matches!(x.type_, FieldType::StaticObject(_))); + if array_static_object_exists { + bail!("array of static objects are currently unsupported"); + } + query("INSERT INTO indexes (name, config) VALUES ($1, $2)") .bind(&config.name) .bind(&serde_json::to_value(&config)?) diff --git a/src/commands/field_parser.rs b/src/commands/field_parser.rs index 151beaa..a7de697 100644 --- a/src/commands/field_parser.rs +++ b/src/commands/field_parser.rs @@ -4,14 +4,29 @@ use tantivy::{ TantivyDocument, }; -use crate::config::{number::NumberFieldType, FieldConfig, FieldType}; +use crate::config::{ + escaped_with_parent_name, number::NumberFieldType, FieldConfig, FieldConfigs, FieldType, +}; type ParseFn = Box Result>; +enum FieldParserVariation { + Value { field: Field, parse_fn: ParseFn }, + Object(Vec), +} + pub struct FieldParser { + /// The field name. Example: "world". pub name: String, - field: Field, - parse_fn: ParseFn, + + /// The tantivy name flattened and escaped. Example: "hello.world". + /// Only used for a debug log. + full_name: String, + + /// Whether the field is a tantivy field or an object of parsers. + variation: FieldParserVariation, + + /// Whether the field is an array. is_array: bool, } @@ -23,14 +38,31 @@ impl FieldParser { doc: &mut TantivyDocument, json_value: serde_json::Value, ) -> Result<()> { - if self.is_array { - let values: Vec = serde_json::from_value(json_value)?; - for value in values { - doc.add_field_value(self.field, (self.parse_fn)(value)?); + match &self.variation { + FieldParserVariation::Value { field, parse_fn } => { + if self.is_array { + let values: Vec = serde_json::from_value(json_value)?; + for value in values { + doc.add_field_value(*field, parse_fn(value)?); + } + } else { + let value = parse_fn(json_value)?; + doc.add_field_value(*field, value); + } + } + FieldParserVariation::Object(parsers) => { + let mut json_obj: serde_json::Map = + serde_json::from_value(json_value)?; + + for parser in parsers { + let Some(json_value) = json_obj.remove(parser.name.as_str()) else { + debug!("field '{}' in schema but not found", &parser.full_name); + continue; + }; + + parser.add_parsed_field_value(doc, json_value)?; + } } - } else { - let value = (self.parse_fn)(json_value)?; - doc.add_field_value(self.field, value); } Ok(()) @@ -43,22 +75,21 @@ fn common_parse(value: serde_json::Value) -> Result { fn build_parser_from_field_config( config: FieldConfig, + full_name: String, schema_builder: &mut SchemaBuilder, ) -> Result { - let name = config.name; - let (field, parse_fn): (Field, ParseFn) = match config.type_ { FieldType::Text(options) => { - let field = schema_builder.add_text_field(&name, options); + let field = schema_builder.add_text_field(&full_name, options); (field, Box::new(common_parse)) } FieldType::Number(options) => { let field_type = options.type_.clone(); let parse_string = options.parse_string; let field = match field_type { - NumberFieldType::U64 => schema_builder.add_u64_field(&name, options), - NumberFieldType::I64 => schema_builder.add_i64_field(&name, options), - NumberFieldType::F64 => schema_builder.add_f64_field(&name, options), + NumberFieldType::U64 => schema_builder.add_u64_field(&full_name, options), + NumberFieldType::I64 => schema_builder.add_i64_field(&full_name, options), + NumberFieldType::F64 => schema_builder.add_f64_field(&full_name, options), }; ( @@ -82,7 +113,7 @@ fn build_parser_from_field_config( } FieldType::Boolean(options) => { let parse_string = options.parse_string; - let field = schema_builder.add_bool_field(&name, options); + let field = schema_builder.add_bool_field(&full_name, options); ( field, Box::new(move |value| { @@ -108,36 +139,60 @@ fn build_parser_from_field_config( ) } FieldType::Datetime(options) => { - let field = schema_builder.add_date_field(&name, options.clone()); + let field = schema_builder.add_date_field(&full_name, options.clone()); ( field, Box::new(move |value| options.formats.try_parse(value)), ) } FieldType::Ip(options) => { - let field = schema_builder.add_ip_addr_field(&name, options); + let field = schema_builder.add_ip_addr_field(&full_name, options); (field, Box::new(common_parse)) } FieldType::DynamicObject(options) => { - let field = schema_builder.add_json_field(&name, options); + let field = schema_builder.add_json_field(&full_name, options); (field, Box::new(common_parse)) } + FieldType::StaticObject(options) => { + let parsers = build_parsers_from_field_configs_inner( + options.fields, + schema_builder, + Some(full_name.clone()), + )?; + return Ok(FieldParser { + name: config.name, + full_name, + variation: FieldParserVariation::Object(parsers), + is_array: config.array, + }); + } }; Ok(FieldParser { - name, - field, - parse_fn, + name: config.name, + full_name, + variation: FieldParserVariation::Value { field, parse_fn }, is_array: config.array, }) } -pub fn build_parsers_from_fields_config( - fields: Vec, +fn build_parsers_from_field_configs_inner( + fields: FieldConfigs, schema_builder: &mut SchemaBuilder, + parent_name: Option, ) -> Result> { fields .into_iter() - .map(|field| build_parser_from_field_config(field, schema_builder)) + .map(|field| { + let name = escaped_with_parent_name(&field.name, parent_name.as_deref()); + build_parser_from_field_config(field, name, schema_builder) + }) .collect::>>() } + +pub fn build_parsers_from_field_configs( + fields: FieldConfigs, + schema_builder: &mut SchemaBuilder, +) -> Result> { + build_parsers_from_field_configs_inner(fields, schema_builder, None) +} diff --git a/src/commands/index.rs b/src/commands/index.rs index 166e364..f8b0bbf 100644 --- a/src/commands/index.rs +++ b/src/commands/index.rs @@ -10,7 +10,7 @@ use tokio::{ task::spawn_blocking, }; -use crate::{args::IndexArgs, commands::field_parser::build_parsers_from_fields_config}; +use crate::{args::IndexArgs, commands::field_parser::build_parsers_from_field_configs}; use super::{dynamic_field_config, get_index_config, write_unified_index, DYNAMIC_FIELD_NAME}; @@ -20,7 +20,7 @@ pub async fn run_index(args: IndexArgs, pool: PgPool) -> Result<()> { let mut schema_builder = Schema::builder(); let dynamic_field = schema_builder.add_json_field(DYNAMIC_FIELD_NAME, dynamic_field_config()); let field_parsers = - build_parsers_from_fields_config(config.schema.fields, &mut schema_builder)?; + build_parsers_from_field_configs(config.schema.fields, &mut schema_builder)?; let schema = schema_builder.build(); diff --git a/src/commands/search.rs b/src/commands/search.rs index 46f12ea..27bc0eb 100644 --- a/src/commands/search.rs +++ b/src/commands/search.rs @@ -17,7 +17,7 @@ use tokio::{ use crate::{ args::SearchArgs, - config::{FieldConfig, FieldType}, + config::{split_object_field_name, unescaped_field_name, FieldConfig, FieldType}, }; use super::{dynamic_field_config, get_index_config, open_unified_directories, DYNAMIC_FIELD_NAME}; @@ -44,20 +44,49 @@ fn get_prettified_json( continue; }; - if field.name != DYNAMIC_FIELD_NAME { + if field.name == DYNAMIC_FIELD_NAME { + let OwnedValue::Object(object) = value else { + return Err(eyre!( + "expected {} field to be an object", + DYNAMIC_FIELD_NAME + )); + }; + + for (k, v) in object { + prettified_field_map.insert(k, v); + } + + continue; + } + + let names = split_object_field_name(&field.name) + .into_iter() + .map(unescaped_field_name) + .collect::>(); + if names.len() <= 1 { prettified_field_map.insert(field.name.clone(), value); continue; } - let OwnedValue::Object(object) = value else { - return Err(eyre!( - "expected {} field to be an object", - DYNAMIC_FIELD_NAME - )); - }; + // Prettify static object with inner fields like {"hello.world": 1} + // to look like: {"hello": {"world": 1}}. + + let mut inner_map = prettified_field_map + .entry(names[0].to_string()) + .or_insert(OwnedValue::Object(BTreeMap::new())); + + for name in &names[1..names.len() - 1] { + let OwnedValue::Object(map) = inner_map else { + panic!("invalid state, every map is an object"); + }; + + inner_map = map + .entry(name.to_string()) + .or_insert(OwnedValue::Object(BTreeMap::new())); + } - for (k, v) in object { - prettified_field_map.insert(k, v); + if let OwnedValue::Object(map) = inner_map { + map.insert(names[names.len() - 1].to_string(), value); } } @@ -103,7 +132,7 @@ pub async fn run_search(args: SearchArgs, pool: PgPool) -> Result<()> { let config = get_index_config(&args.name, &pool).await?; let indexed_field_names = { - let mut fields = config.schema.get_indexed_fields(); + let mut fields = config.schema.fields.get_indexed(); fields.push(FieldConfig { name: DYNAMIC_FIELD_NAME.to_string(), array: false, diff --git a/src/config/mod.rs b/src/config/mod.rs index 7c91f20..e41add4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3,9 +3,10 @@ pub mod datetime; pub mod dynamic_object; pub mod ip; pub mod number; +pub mod static_object; pub mod text; -use std::path::Path; +use std::{ops::Deref, path::Path, vec::IntoIter}; use color_eyre::eyre::Result; use serde::{Deserialize, Serialize}; @@ -19,6 +20,7 @@ use self::{ dynamic_object::DynamicObjectFieldConfig, ip::IpFieldConfig, number::NumberFieldConfig, + static_object::StaticObjectFieldConfig, text::{IndexedTextFieldType, TextFieldConfig}, }; @@ -96,6 +98,7 @@ pub enum FieldType { Datetime(DateTimeFieldConfig), Ip(IpFieldConfig), DynamicObject(DynamicObjectFieldConfig), + StaticObject(StaticObjectFieldConfig), } impl FieldType { @@ -110,6 +113,7 @@ impl FieldType { DynamicObject(config) => { !matches!(config.indexed, IndexedDynamicObjectFieldType::False) } + StaticObject(_) => false, } } } @@ -125,26 +129,108 @@ pub struct FieldConfig { pub type_: FieldType, } +pub fn split_object_field_name(s: &str) -> Vec<&str> { + let mut result = Vec::new(); + let mut start = 0; + + for (i, c) in s.char_indices().peekable() { + if c == '.' && (i == 0 || (i > 0 && &s[i - 1..i] != "\\")) { + result.push(&s[start..i]); + start = i + 1; + } + } + + result.push(&s[start..]); + result +} + +fn escaped_field_name(name: &str) -> String { + name.replace('.', "\\.") +} + +pub fn unescaped_field_name(name: &str) -> String { + name.replace("\\.", ".") +} + +pub fn escaped_with_parent_name(name: &str, parent_name: Option<&str>) -> String { + let escaped = escaped_field_name(name); + if let Some(parent_name) = parent_name { + format!("{}.{}", parent_name, escaped) + } else { + escaped + } +} + +impl FieldConfig { + fn with_parent_name(self, parent_name: Option<&str>) -> Self { + Self { + name: escaped_with_parent_name(&self.name, parent_name), + array: self.array, + type_: self.type_, + } + } +} + #[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct IndexSchema { - #[serde(default)] - pub fields: Vec, +pub struct FieldConfigs(Vec); - #[serde(default)] - #[serde(skip_serializing_if = "Option::is_none")] - time_field: Option, +impl IntoIterator for FieldConfigs { + type Item = FieldConfig; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl Deref for FieldConfigs { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } } -impl IndexSchema { - pub fn get_indexed_fields(&self) -> Vec { - self.fields +impl FieldConfigs { + fn get_indexed_inner(&self, parent_name: Option) -> Vec { + let mut indexed_fields = self .iter() - .filter(|x| x.type_.is_indexed()) + .filter(|field| field.type_.is_indexed()) .cloned() - .collect() + .map(|field| field.with_parent_name(parent_name.as_deref())) + .collect::>(); + + indexed_fields.extend( + self.iter() + .filter_map(|field| { + if let FieldType::StaticObject(config) = &field.type_ { + let name = escaped_with_parent_name(&field.name, parent_name.as_deref()); + Some(config.fields.get_indexed_inner(Some(name))) + } else { + None + } + }) + .flatten(), + ); + + indexed_fields + } + + pub fn get_indexed(&self) -> Vec { + self.get_indexed_inner(None) } } +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct IndexSchema { + #[serde(default)] + pub fields: FieldConfigs, + + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + time_field: Option, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) struct IndexConfig { pub name: String, diff --git a/src/config/static_object.rs b/src/config/static_object.rs new file mode 100644 index 0000000..7aa0705 --- /dev/null +++ b/src/config/static_object.rs @@ -0,0 +1,9 @@ +use serde::{Deserialize, Serialize}; + +use super::FieldConfigs; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StaticObjectFieldConfig { + #[serde(default)] + pub fields: FieldConfigs, +}