Skip to content

Commit

Permalink
Add static object fields
Browse files Browse the repository at this point in the history
  • Loading branch information
tontinton committed Jun 7, 2024
1 parent b5e79b9 commit 3116da9
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 53 deletions.
5 changes: 5 additions & 0 deletions example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ schema:
- name: resource
type: !dynamic_object
expand_dots: true
- name: attributes
type: !static_object
fields:
- name: class
type: !text
16 changes: 14 additions & 2 deletions src/commands/create.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
use color_eyre::Result;
use color_eyre::{eyre::bail, Result};
use sqlx::{query, PgPool};

use crate::{args::CreateArgs, config::IndexConfig};
use crate::{
args::CreateArgs,
config::{FieldType, IndexConfig},
};

pub async fn run_create(args: CreateArgs, pool: PgPool) -> Result<()> {
let config = IndexConfig::from_path(&args.config_path).await?;

let array_static_object_exists = config
.schema
.fields
.iter()
.any(|x| x.array && matches!(x.type_, FieldType::StaticObject(_)));
if array_static_object_exists {
bail!("array of static objects are currently unsupported");
}

query("INSERT INTO indexes (name, config) VALUES ($1, $2)")
.bind(&config.name)
.bind(&serde_json::to_value(&config)?)
Expand Down
107 changes: 81 additions & 26 deletions src/commands/field_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,29 @@ use tantivy::{
TantivyDocument,
};

use crate::config::{number::NumberFieldType, FieldConfig, FieldType};
use crate::config::{
escaped_with_parent_name, number::NumberFieldType, FieldConfig, FieldConfigs, FieldType,
};

type ParseFn = Box<dyn Fn(serde_json::Value) -> Result<OwnedValue>>;

enum FieldParserVariation {
Value { field: Field, parse_fn: ParseFn },
Object(Vec<FieldParser>),
}

pub struct FieldParser {
/// The field name. Example: "world".
pub name: String,
field: Field,
parse_fn: ParseFn,

/// The tantivy name flattened and escaped. Example: "hello.world".
/// Only used for a debug log.
full_name: String,

/// Whether the field is a tantivy field or an object of parsers.
variation: FieldParserVariation,

/// Whether the field is an array.
is_array: bool,
}

Expand All @@ -23,14 +38,31 @@ impl FieldParser {
doc: &mut TantivyDocument,
json_value: serde_json::Value,
) -> Result<()> {
if self.is_array {
let values: Vec<serde_json::Value> = serde_json::from_value(json_value)?;
for value in values {
doc.add_field_value(self.field, (self.parse_fn)(value)?);
match &self.variation {
FieldParserVariation::Value { field, parse_fn } => {
if self.is_array {
let values: Vec<serde_json::Value> = serde_json::from_value(json_value)?;
for value in values {
doc.add_field_value(*field, parse_fn(value)?);
}
} else {
let value = parse_fn(json_value)?;
doc.add_field_value(*field, value);
}
}
FieldParserVariation::Object(parsers) => {
let mut json_obj: serde_json::Map<String, serde_json::Value> =
serde_json::from_value(json_value)?;

for parser in parsers {
let Some(json_value) = json_obj.remove(parser.name.as_str()) else {
debug!("field '{}' in schema but not found", &parser.full_name);
continue;
};

parser.add_parsed_field_value(doc, json_value)?;
}
}
} else {
let value = (self.parse_fn)(json_value)?;
doc.add_field_value(self.field, value);
}

Ok(())
Expand All @@ -43,22 +75,21 @@ fn common_parse(value: serde_json::Value) -> Result<OwnedValue> {

fn build_parser_from_field_config(
config: FieldConfig,
full_name: String,
schema_builder: &mut SchemaBuilder,
) -> Result<FieldParser> {
let name = config.name;

let (field, parse_fn): (Field, ParseFn) = match config.type_ {
FieldType::Text(options) => {
let field = schema_builder.add_text_field(&name, options);
let field = schema_builder.add_text_field(&full_name, options);
(field, Box::new(common_parse))
}
FieldType::Number(options) => {
let field_type = options.type_.clone();
let parse_string = options.parse_string;
let field = match field_type {
NumberFieldType::U64 => schema_builder.add_u64_field(&name, options),
NumberFieldType::I64 => schema_builder.add_i64_field(&name, options),
NumberFieldType::F64 => schema_builder.add_f64_field(&name, options),
NumberFieldType::U64 => schema_builder.add_u64_field(&full_name, options),
NumberFieldType::I64 => schema_builder.add_i64_field(&full_name, options),
NumberFieldType::F64 => schema_builder.add_f64_field(&full_name, options),
};

(
Expand All @@ -82,7 +113,7 @@ fn build_parser_from_field_config(
}
FieldType::Boolean(options) => {
let parse_string = options.parse_string;
let field = schema_builder.add_bool_field(&name, options);
let field = schema_builder.add_bool_field(&full_name, options);
(
field,
Box::new(move |value| {
Expand All @@ -108,36 +139,60 @@ fn build_parser_from_field_config(
)
}
FieldType::Datetime(options) => {
let field = schema_builder.add_date_field(&name, options.clone());
let field = schema_builder.add_date_field(&full_name, options.clone());
(
field,
Box::new(move |value| options.formats.try_parse(value)),
)
}
FieldType::Ip(options) => {
let field = schema_builder.add_ip_addr_field(&name, options);
let field = schema_builder.add_ip_addr_field(&full_name, options);
(field, Box::new(common_parse))
}
FieldType::DynamicObject(options) => {
let field = schema_builder.add_json_field(&name, options);
let field = schema_builder.add_json_field(&full_name, options);
(field, Box::new(common_parse))
}
FieldType::StaticObject(options) => {
let parsers = build_parsers_from_field_configs_inner(
options.fields,
schema_builder,
Some(full_name.clone()),
)?;
return Ok(FieldParser {
name: config.name,
full_name,
variation: FieldParserVariation::Object(parsers),
is_array: config.array,
});
}
};

Ok(FieldParser {
name,
field,
parse_fn,
name: config.name,
full_name,
variation: FieldParserVariation::Value { field, parse_fn },
is_array: config.array,
})
}

pub fn build_parsers_from_fields_config(
fields: Vec<FieldConfig>,
fn build_parsers_from_field_configs_inner(
fields: FieldConfigs,
schema_builder: &mut SchemaBuilder,
parent_name: Option<String>,
) -> Result<Vec<FieldParser>> {
fields
.into_iter()
.map(|field| build_parser_from_field_config(field, schema_builder))
.map(|field| {
let name = escaped_with_parent_name(&field.name, parent_name.as_deref());
build_parser_from_field_config(field, name, schema_builder)
})
.collect::<Result<Vec<_>>>()
}

pub fn build_parsers_from_field_configs(
fields: FieldConfigs,
schema_builder: &mut SchemaBuilder,
) -> Result<Vec<FieldParser>> {
build_parsers_from_field_configs_inner(fields, schema_builder, None)
}
4 changes: 2 additions & 2 deletions src/commands/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use tokio::{
task::spawn_blocking,
};

use crate::{args::IndexArgs, commands::field_parser::build_parsers_from_fields_config};
use crate::{args::IndexArgs, commands::field_parser::build_parsers_from_field_configs};

use super::{dynamic_field_config, get_index_config, write_unified_index, DYNAMIC_FIELD_NAME};

Expand All @@ -20,7 +20,7 @@ pub async fn run_index(args: IndexArgs, pool: PgPool) -> Result<()> {
let mut schema_builder = Schema::builder();
let dynamic_field = schema_builder.add_json_field(DYNAMIC_FIELD_NAME, dynamic_field_config());
let field_parsers =
build_parsers_from_fields_config(config.schema.fields, &mut schema_builder)?;
build_parsers_from_field_configs(config.schema.fields, &mut schema_builder)?;

let schema = schema_builder.build();

Expand Down
51 changes: 40 additions & 11 deletions src/commands/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use tokio::{

use crate::{
args::SearchArgs,
config::{FieldConfig, FieldType},
config::{split_object_field_name, unescaped_field_name, FieldConfig, FieldType},
};

use super::{dynamic_field_config, get_index_config, open_unified_directories, DYNAMIC_FIELD_NAME};
Expand All @@ -44,20 +44,49 @@ fn get_prettified_json(
continue;
};

if field.name != DYNAMIC_FIELD_NAME {
if field.name == DYNAMIC_FIELD_NAME {
let OwnedValue::Object(object) = value else {
return Err(eyre!(
"expected {} field to be an object",
DYNAMIC_FIELD_NAME
));
};

for (k, v) in object {
prettified_field_map.insert(k, v);
}

continue;
}

let names = split_object_field_name(&field.name)
.into_iter()
.map(unescaped_field_name)
.collect::<Vec<_>>();
if names.len() <= 1 {
prettified_field_map.insert(field.name.clone(), value);
continue;
}

let OwnedValue::Object(object) = value else {
return Err(eyre!(
"expected {} field to be an object",
DYNAMIC_FIELD_NAME
));
};
// Prettify static object with inner fields like {"hello.world": 1}
// to look like: {"hello": {"world": 1}}.

let mut inner_map = prettified_field_map
.entry(names[0].to_string())
.or_insert(OwnedValue::Object(BTreeMap::new()));

for name in &names[1..names.len() - 1] {
let OwnedValue::Object(map) = inner_map else {
panic!("invalid state, every map is an object");
};

inner_map = map
.entry(name.to_string())
.or_insert(OwnedValue::Object(BTreeMap::new()));
}

for (k, v) in object {
prettified_field_map.insert(k, v);
if let OwnedValue::Object(map) = inner_map {
map.insert(names[names.len() - 1].to_string(), value);
}
}

Expand Down Expand Up @@ -103,7 +132,7 @@ pub async fn run_search(args: SearchArgs, pool: PgPool) -> Result<()> {
let config = get_index_config(&args.name, &pool).await?;

let indexed_field_names = {
let mut fields = config.schema.get_indexed_fields();
let mut fields = config.schema.fields.get_indexed();
fields.push(FieldConfig {
name: DYNAMIC_FIELD_NAME.to_string(),
array: false,
Expand Down
Loading

0 comments on commit 3116da9

Please sign in to comment.