Skip to content

Commit

Permalink
perf: improve perf of encoding with new UTF-8 validation
Browse files Browse the repository at this point in the history
This makes four changes:

1. The `EscapingScheme` and `ValidationScheme` enums are now `Copy`
   since they are very small and cheap to copy. They're passed by
   value rather than by reference.
2. The `escape_name` function now returns a `Cow` rather than a
   `String` to avoid allocations in many cases.
3. `escape_name` also preallocates a buffer for the escaped name
   rather than starting with an empty `String` and growing it, to
   amortize the allocations.
4. Use `is_ascii_alphabetic` and `is_ascii_digit` to check for
   characters that are valid in metric and label names.

Based on profiles I suspect that #2 has the highest impact but haven't
split these out to see how much of a difference it makes.
  • Loading branch information
sd2k authored and fedetorres93 committed Jan 22, 2025
1 parent ca92109 commit f11d11c
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 82 deletions.
49 changes: 24 additions & 25 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ impl ExemplarValueEncoder<'_> {

/// Enum for determining how metric and label names will
/// be validated.
#[derive(Debug, PartialEq, Default, Clone)]
#[derive(Debug, PartialEq, Default, Clone, Copy)]
pub enum ValidationScheme {
/// Setting that requires that metric and label names
/// conform to the original OpenMetrics character requirements.
Expand Down Expand Up @@ -807,9 +807,9 @@ fn is_valid_legacy_prefix(prefix: Option<&Prefix>) -> bool {
fn is_quoted_metric_name(
name: &str,
prefix: Option<&Prefix>,
validation_scheme: &ValidationScheme,
validation_scheme: ValidationScheme,
) -> bool {
*validation_scheme == ValidationScheme::UTF8Validation
validation_scheme == ValidationScheme::UTF8Validation
&& (!is_valid_legacy_metric_name(name) || !is_valid_legacy_prefix(prefix))
}

Expand All @@ -818,24 +818,20 @@ fn is_valid_legacy_label_name(label_name: &str) -> bool {
return false;
}
for (i, b) in label_name.chars().enumerate() {
if !((b >= 'a' && b <= 'z')
|| (b >= 'A' && b <= 'Z')
|| b == '_'
|| (b >= '0' && b <= '9' && i > 0))
{
if !(b.is_ascii_alphabetic() || b == '_' || (b.is_ascii_digit() && i > 0)) {
return false;
}
}
true
}

fn is_quoted_label_name(name: &str, validation_scheme: &ValidationScheme) -> bool {
*validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name)
fn is_quoted_label_name(name: &str, validation_scheme: ValidationScheme) -> bool {
validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name)
}

/// Enum for determining how metric and label names will
/// be escaped.
#[derive(Debug, Default, Clone)]
#[derive(Debug, Default, Clone, Copy)]
pub enum EscapingScheme {
/// Replaces all legacy-invalid characters with underscores.
#[default]
Expand Down Expand Up @@ -863,26 +859,30 @@ impl EscapingScheme {
}
}

fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
fn escape_name(name: &str, scheme: EscapingScheme) -> Cow<'_, str> {
if name.is_empty() {
return name.to_string();
return name.into();
}
let mut escaped = String::new();
match scheme {
EscapingScheme::NoEscaping => return name.to_string(),
EscapingScheme::NoEscaping => name.into(),
EscapingScheme::UnderscoreEscaping | EscapingScheme::ValueEncodingEscaping
if is_valid_legacy_metric_name(name) =>
{
name.into()
}
EscapingScheme::UnderscoreEscaping => {
if is_valid_legacy_metric_name(name) {
return name.to_string();
}
let mut escaped = String::with_capacity(name.len());
for (i, b) in name.chars().enumerate() {
if is_valid_legacy_char(b, i) {
escaped.push(b);
} else {
escaped.push('_');
}
}
escaped.into()
}
EscapingScheme::DotsEscaping => {
let mut escaped = String::with_capacity(name.len());
for (i, b) in name.chars().enumerate() {
if b == '_' {
escaped.push_str("__");
Expand All @@ -894,11 +894,10 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
escaped.push_str("__");
}
}
escaped.into()
}
EscapingScheme::ValueEncodingEscaping => {
if is_valid_legacy_metric_name(name) {
return name.to_string();
}
let mut escaped = String::with_capacity(name.len());
escaped.push_str("U__");
for (i, b) in name.chars().enumerate() {
if b == '_' {
Expand All @@ -909,9 +908,9 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
write!(escaped, "_{:x}_", b as i64).unwrap();
}
}
escaped.into()
}
}
escaped
}

/// Returns the escaping scheme to use based on the given header.
Expand Down Expand Up @@ -1120,13 +1119,13 @@ mod tests {
];

for scenario in scenarios {
let result = escape_name(scenario.input, &EscapingScheme::UnderscoreEscaping);
let result = escape_name(scenario.input, EscapingScheme::UnderscoreEscaping);
assert_eq!(result, scenario.expected_underscores, "{}", scenario.name);

let result = escape_name(scenario.input, &EscapingScheme::DotsEscaping);
let result = escape_name(scenario.input, EscapingScheme::DotsEscaping);
assert_eq!(result, scenario.expected_dots, "{}", scenario.name);

let result = escape_name(scenario.input, &EscapingScheme::ValueEncodingEscaping);
let result = escape_name(scenario.input, EscapingScheme::ValueEncodingEscaping);
assert_eq!(result, scenario.expected_value, "{}", scenario.name);
}
}
Expand Down
105 changes: 50 additions & 55 deletions src/encoding/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ where
registry.encode(
&mut DescriptorEncoder::new(
writer,
&registry.name_validation_scheme(),
&registry.escaping_scheme(),
registry.name_validation_scheme(),
registry.escaping_scheme(),
)
.into(),
)
Expand Down Expand Up @@ -196,8 +196,8 @@ pub(crate) struct DescriptorEncoder<'a> {
writer: &'a mut dyn Write,
prefix: Option<&'a Prefix>,
labels: &'a [(Cow<'static, str>, Cow<'static, str>)],
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
}

impl std::fmt::Debug for DescriptorEncoder<'_> {
Expand All @@ -207,11 +207,11 @@ impl std::fmt::Debug for DescriptorEncoder<'_> {
}

impl DescriptorEncoder<'_> {
pub(crate) fn new<'a>(
writer: &'a mut dyn Write,
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
) -> DescriptorEncoder<'a> {
pub(crate) fn new(
writer: &'_ mut dyn Write,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
) -> DescriptorEncoder<'_> {
DescriptorEncoder {
writer,
prefix: Default::default(),
Expand All @@ -230,8 +230,8 @@ impl DescriptorEncoder<'_> {
prefix,
labels,
writer: self.writer,
name_validation_scheme: &self.name_validation_scheme,
escaping_scheme: &self.escaping_scheme,
name_validation_scheme: self.name_validation_scheme,
escaping_scheme: self.escaping_scheme,
}
}

Expand All @@ -246,14 +246,12 @@ impl DescriptorEncoder<'_> {
let mut escaped_prefix: Option<&Prefix> = None;
let escaped_prefix_value: Prefix;
if let Some(prefix) = self.prefix {
escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme));
escaped_prefix_value =
Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned());
escaped_prefix = Some(&escaped_prefix_value);
}
let is_quoted_metric_name = is_quoted_metric_name(
escaped_name.as_str(),
escaped_prefix,
self.name_validation_scheme,
);
let is_quoted_metric_name =
is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme);
self.writer.write_str("# HELP ")?;
if is_quoted_metric_name {
self.writer.write_str("\"")?;
Expand All @@ -262,7 +260,7 @@ impl DescriptorEncoder<'_> {
self.writer.write_str(prefix.as_str())?;
self.writer.write_str("_")?;
}
self.writer.write_str(escaped_name.as_str())?;
self.writer.write_str(&escaped_name)?;
if let Some(unit) = unit {
self.writer.write_str("_")?;
self.writer.write_str(unit.as_str())?;
Expand All @@ -282,7 +280,7 @@ impl DescriptorEncoder<'_> {
self.writer.write_str(prefix.as_str())?;
self.writer.write_str("_")?;
}
self.writer.write_str(escaped_name.as_str())?;
self.writer.write_str(&escaped_name)?;
if let Some(unit) = unit {
self.writer.write_str("_")?;
self.writer.write_str(unit.as_str())?;
Expand All @@ -303,7 +301,7 @@ impl DescriptorEncoder<'_> {
self.writer.write_str(prefix.as_str())?;
self.writer.write_str("_")?;
}
self.writer.write_str(escaped_name.as_str())?;
self.writer.write_str(&escaped_name)?;
self.writer.write_str("_")?;
self.writer.write_str(unit.as_str())?;
if is_quoted_metric_name {
Expand Down Expand Up @@ -343,8 +341,8 @@ pub(crate) struct MetricEncoder<'a> {
unit: Option<&'a Unit>,
const_labels: &'a [(Cow<'static, str>, Cow<'static, str>)],
family_labels: Option<&'a dyn super::EncodeLabelSet>,
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
}

impl std::fmt::Debug for MetricEncoder<'_> {
Expand Down Expand Up @@ -534,14 +532,12 @@ impl MetricEncoder<'_> {
let mut escaped_prefix: Option<&Prefix> = None;
let escaped_prefix_value: Prefix;
if let Some(prefix) = self.prefix {
escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme));
escaped_prefix_value =
Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned());
escaped_prefix = Some(&escaped_prefix_value);
}
let is_quoted_metric_name = is_quoted_metric_name(
escaped_name.as_str(),
escaped_prefix,
self.name_validation_scheme,
);
let is_quoted_metric_name =
is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme);
if is_quoted_metric_name {
self.writer.write_str("{")?;
self.writer.write_str("\"")?;
Expand All @@ -550,7 +546,7 @@ impl MetricEncoder<'_> {
self.writer.write_str(prefix.as_str())?;
self.writer.write_str("_")?;
}
self.writer.write_str(escaped_name.as_str())?;
self.writer.write_str(&escaped_name)?;
if let Some(unit) = self.unit {
self.writer.write_str("_")?;
self.writer.write_str(unit.as_str())?;
Expand All @@ -576,14 +572,12 @@ impl MetricEncoder<'_> {
let mut escaped_prefix: Option<&Prefix> = None;
let escaped_prefix_value: Prefix;
if let Some(prefix) = self.prefix {
escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme));
escaped_prefix_value =
Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned());
escaped_prefix = Some(&escaped_prefix_value);
}
let is_quoted_metric_name = is_quoted_metric_name(
escaped_name.as_str(),
escaped_prefix,
self.name_validation_scheme,
);
let is_quoted_metric_name =
is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme);
if self.const_labels.is_empty()
&& additional_labels.is_none()
&& self.family_labels.is_none()
Expand Down Expand Up @@ -750,8 +744,8 @@ impl ExemplarValueEncoder<'_> {
pub(crate) struct LabelSetEncoder<'a> {
writer: &'a mut dyn Write,
first: bool,
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
}

impl std::fmt::Debug for LabelSetEncoder<'_> {
Expand All @@ -765,8 +759,8 @@ impl std::fmt::Debug for LabelSetEncoder<'_> {
impl<'a> LabelSetEncoder<'a> {
fn new(
writer: &'a mut dyn Write,
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
) -> Self {
Self {
writer,
Expand All @@ -791,8 +785,8 @@ impl<'a> LabelSetEncoder<'a> {
pub(crate) struct LabelEncoder<'a> {
writer: &'a mut dyn Write,
first: bool,
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
}

impl std::fmt::Debug for LabelEncoder<'_> {
Expand All @@ -818,8 +812,8 @@ impl LabelEncoder<'_> {

pub(crate) struct LabelKeyEncoder<'a> {
writer: &'a mut dyn Write,
name_validation_scheme: &'a ValidationScheme,
escaping_scheme: &'a EscapingScheme,
name_validation_scheme: ValidationScheme,
escaping_scheme: EscapingScheme,
}

impl std::fmt::Debug for LabelKeyEncoder<'_> {
Expand All @@ -840,12 +834,11 @@ impl<'a> LabelKeyEncoder<'a> {
impl std::fmt::Write for LabelKeyEncoder<'_> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
let escaped_name = escape_name(s, self.escaping_scheme);
let is_quoted_label_name =
is_quoted_label_name(escaped_name.as_str(), self.name_validation_scheme);
let is_quoted_label_name = is_quoted_label_name(&escaped_name, self.name_validation_scheme);
if is_quoted_label_name {
self.writer.write_str("\"")?;
}
self.writer.write_str(escaped_name.as_str())?;
self.writer.write_str(&escaped_name)?;
if is_quoted_label_name {
self.writer.write_str("\"")?;
}
Expand Down Expand Up @@ -1043,8 +1036,7 @@ mod tests {

encode(&mut encoded, &registry).unwrap();

let expected = "# HELP \"my.gauge\"\" My\ngau\nge\".\n"
.to_owned()
let expected = "# HELP \"my.gauge\"\" My\ngau\nge\".\n".to_owned()
+ "# TYPE \"my.gauge\"\" gauge\n"
+ "{\"my.gauge\"\"} inf\n"
+ "# EOF\n";
Expand Down Expand Up @@ -1646,8 +1638,8 @@ mod tests {
unit: None,
const_labels: &[],
family_labels: None,
name_validation_scheme: &UTF8Validation,
escaping_scheme: &NoEscaping,
name_validation_scheme: UTF8Validation,
escaping_scheme: NoEscaping,
};

encoder.encode_labels::<NoLabelSet>(None).unwrap();
Expand All @@ -1667,8 +1659,8 @@ mod tests {
unit: None,
const_labels: &const_labels,
family_labels: Some(&family_labels),
name_validation_scheme: &UTF8Validation,
escaping_scheme: &NoEscaping,
name_validation_scheme: UTF8Validation,
escaping_scheme: NoEscaping,
};

encoder.encode_labels(Some(&additional_labels)).unwrap();
Expand All @@ -1688,12 +1680,15 @@ mod tests {
unit: None,
const_labels: &const_labels,
family_labels: Some(&family_labels),
name_validation_scheme: &UTF8Validation,
escaping_scheme: &NoEscaping,
name_validation_scheme: UTF8Validation,
escaping_scheme: NoEscaping,
};

encoder.encode_labels(Some(&additional_labels)).unwrap();
assert_eq!(buffer, "{\"service.name\"=\"t1\",\"whatever\\whatever\"=\"t2\",\"t*3\"=\"t3\"}");
assert_eq!(
buffer,
"{\"service.name\"=\"t1\",\"whatever\\whatever\"=\"t2\",\"t*3\"=\"t3\"}"
);
}

fn parse_with_python_client(input: String) {
Expand Down
Loading

0 comments on commit f11d11c

Please sign in to comment.