Skip to content

Commit

Permalink
Merge pull request #6134 from sargas/expr-error-messages
Browse files Browse the repository at this point in the history
expr: Fix assorted test errors in tests/expr/expr.pl
  • Loading branch information
sylvestre authored Jan 19, 2025
2 parents b6f6d93 + 1e0f697 commit 34421f5
Show file tree
Hide file tree
Showing 2 changed files with 259 additions and 10 deletions.
26 changes: 25 additions & 1 deletion src/uu/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ pub enum ExprError {
DivisionByZero,
InvalidRegexExpression,
ExpectedClosingBraceAfter(String),
ExpectedClosingBraceInsteadOf(String),
UnmatchedOpeningParenthesis,
UnmatchedClosingParenthesis,
UnmatchedOpeningBrace,
UnmatchedClosingBrace,
InvalidContent(String),
}

impl Display for ExprError {
Expand All @@ -50,7 +56,25 @@ impl Display for ExprError {
Self::DivisionByZero => write!(f, "division by zero"),
Self::InvalidRegexExpression => write!(f, "Invalid regex expression"),
Self::ExpectedClosingBraceAfter(s) => {
write!(f, "expected ')' after {}", s.quote())
write!(f, "syntax error: expecting ')' after {}", s.quote())
}
Self::ExpectedClosingBraceInsteadOf(s) => {
write!(f, "syntax error: expecting ')' instead of {}", s.quote())
}
Self::UnmatchedOpeningParenthesis => {
write!(f, "Unmatched ( or \\(")
}
Self::UnmatchedClosingParenthesis => {
write!(f, "Unmatched ) or \\)")
}
Self::UnmatchedOpeningBrace => {
write!(f, "Unmatched \\{{")
}
Self::UnmatchedClosingBrace => {
write!(f, "Unmatched ) or \\}}")
}
Self::InvalidContent(s) => {
write!(f, "Invalid content of {}", s)
}
}
}
Expand Down
243 changes: 234 additions & 9 deletions src/uu/expr/src/syntax_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// spell-checker:ignore (ToDO) ints paren prec multibytes

use num_bigint::{BigInt, ParseBigIntError};
use num_traits::ToPrimitive;
use num_traits::{ToPrimitive, Zero};
use onig::{Regex, RegexOptions, Syntax};

use crate::{ExprError, ExprResult};
Expand Down Expand Up @@ -139,7 +139,9 @@ impl StringOp {
Self::Match => {
let left = left.eval()?.eval_as_string();
let right = right.eval()?.eval_as_string();
let re_string = format!("^{right}");
check_posix_regex_errors(&right)?;
let prefix = if right.starts_with('*') { r"^\" } else { "^" };
let re_string = format!("{prefix}{right}");
let re = Regex::with_options(
&re_string,
RegexOptions::REGEX_OPTION_NONE,
Expand All @@ -148,7 +150,7 @@ impl StringOp {
.map_err(|_| ExprError::InvalidRegexExpression)?;
Ok(if re.captures_len() > 0 {
re.captures(&left)
.map(|captures| captures.at(1).unwrap())
.and_then(|captures| captures.at(1))
.unwrap_or("")
.to_string()
} else {
Expand All @@ -173,6 +175,99 @@ impl StringOp {
}
}

/// Check for errors in a supplied regular expression
///
/// GNU coreutils shows messages for invalid regular expressions
/// differently from the oniguruma library used by the regex crate.
/// This method attempts to do these checks manually in one pass
/// through the regular expression.
///
/// This method is not comprehensively checking all cases in which
/// a regular expression could be invalid; any cases not caught will
/// result in a [ExprError::InvalidRegexExpression] when passing the
/// regular expression through the Oniguruma bindings. This method is
/// intended to just identify a few situations for which GNU coreutils
/// has specific error messages.
fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
let mut escaped_parens: u64 = 0;
let mut escaped_braces: u64 = 0;
let mut escaped = false;

let mut repeating_pattern_text = String::new();
let mut invalid_content_error = false;

for c in pattern.chars() {
match (escaped, c) {
(true, ')') => {
escaped_parens = escaped_parens
.checked_sub(1)
.ok_or(ExprError::UnmatchedClosingParenthesis)?;
}
(true, '(') => {
escaped_parens += 1;
}
(true, '}') => {
escaped_braces = escaped_braces
.checked_sub(1)
.ok_or(ExprError::UnmatchedClosingBrace)?;
let mut repetition =
repeating_pattern_text[..repeating_pattern_text.len() - 1].splitn(2, ',');
match (
repetition
.next()
.expect("splitn always returns at least one string"),
repetition.next(),
) {
("", None) => {
// Empty repeating pattern
invalid_content_error = true;
}
(x, None) | (x, Some("")) => {
if x.parse::<i16>().is_err() {
invalid_content_error = true;
}
}
("", Some(x)) => {
if x.parse::<i16>().is_err() {
invalid_content_error = true;
}
}
(f, Some(l)) => {
if let (Ok(f), Ok(l)) = (f.parse::<i16>(), l.parse::<i16>()) {
invalid_content_error = invalid_content_error || f > l;
} else {
invalid_content_error = true;
}
}
}
repeating_pattern_text.clear();
}
(true, '{') => {
escaped_braces += 1;
}
_ => {
if escaped_braces > 0 && repeating_pattern_text.len() <= 13 {
repeating_pattern_text.push(c);
}
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
invalid_content_error = true;
}
}
}
escaped = !escaped && c == '\\';
}
match (
escaped_parens.is_zero(),
escaped_braces.is_zero(),
invalid_content_error,
) {
(true, true, false) => Ok(()),
(_, false, _) => Err(ExprError::UnmatchedOpeningBrace),
(false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis),
(true, true, true) => Err(ExprError::InvalidContent(r"\{\}".to_string())),
}
}

/// Precedence for infix binary operators
const PRECEDENCE: &[&[(&str, BinOp)]] = &[
&[("|", BinOp::String(StringOp::Or))],
Expand Down Expand Up @@ -442,13 +537,21 @@ impl<'a> Parser<'a> {
},
"(" => {
let s = self.parse_expression()?;
let close_paren = self.next()?;
if close_paren != ")" {
match self.next() {
Ok(")") => {}
// Since we have parsed at least a '(', there will be a token
// at `self.index - 1`. So this indexing won't panic.
return Err(ExprError::ExpectedClosingBraceAfter(
self.input[self.index - 1].into(),
));
Ok(_) => {
return Err(ExprError::ExpectedClosingBraceInsteadOf(
self.input[self.index - 1].into(),
));
}
Err(ExprError::MissingArgument(_)) => {
return Err(ExprError::ExpectedClosingBraceAfter(
self.input[self.index - 1].into(),
));
}
Err(e) => return Err(e),
}
s
}
Expand Down Expand Up @@ -484,7 +587,10 @@ pub fn is_truthy(s: &NumOrStr) -> bool {

#[cfg(test)]
mod test {
use super::{AstNode, BinOp, NumericOp, RelationOp, StringOp};
use crate::ExprError;
use crate::ExprError::InvalidContent;

use super::{check_posix_regex_errors, AstNode, BinOp, NumericOp, RelationOp, StringOp};

impl From<&str> for AstNode {
fn from(value: &str) -> Self {
Expand Down Expand Up @@ -587,4 +693,123 @@ mod test {
)),
);
}

#[test]
fn missing_closing_parenthesis() {
assert_eq!(
AstNode::parse(&["(", "42"]),
Err(ExprError::ExpectedClosingBraceAfter("42".to_string()))
);
assert_eq!(
AstNode::parse(&["(", "42", "a"]),
Err(ExprError::ExpectedClosingBraceInsteadOf("a".to_string()))
);
}

#[test]
fn empty_substitution() {
// causes a panic in 0.0.25
let result = AstNode::parse(&["a", ":", r"\(b\)*"])
.unwrap()
.eval()
.unwrap();
assert_eq!(result.eval_as_string(), "");
}

#[test]
fn starting_stars_become_escaped() {
let result = AstNode::parse(&["cats", ":", r"*cats"])
.unwrap()
.eval()
.unwrap();
assert_eq!(result.eval_as_string(), "0");

let result = AstNode::parse(&["*cats", ":", r"*cats"])
.unwrap()
.eval()
.unwrap();
assert_eq!(result.eval_as_string(), "5");
}

#[test]
fn only_match_in_beginning() {
let result = AstNode::parse(&["budget", ":", r"get"])
.unwrap()
.eval()
.unwrap();
assert_eq!(result.eval_as_string(), "0");
}

#[test]
fn check_regex_valid() {
assert!(check_posix_regex_errors(r"(a+b) \(a* b\)").is_ok());
}

#[test]
fn check_regex_simple_repeating_pattern() {
assert!(check_posix_regex_errors(r"\(a+b\)\{4\}").is_ok());
}

#[test]
fn check_regex_missing_closing() {
assert_eq!(
check_posix_regex_errors(r"\(abc"),
Err(ExprError::UnmatchedOpeningParenthesis)
);

assert_eq!(
check_posix_regex_errors(r"\{1,2"),
Err(ExprError::UnmatchedOpeningBrace)
);
}

#[test]
fn check_regex_missing_opening() {
assert_eq!(
check_posix_regex_errors(r"abc\)"),
Err(ExprError::UnmatchedClosingParenthesis)
);

assert_eq!(
check_posix_regex_errors(r"abc\}"),
Err(ExprError::UnmatchedClosingBrace)
);
}

#[test]
fn check_regex_empty_repeating_pattern() {
assert_eq!(
check_posix_regex_errors("ab\\{\\}"),
Err(InvalidContent(r"\{\}".to_string()))
)
}

#[test]
fn check_regex_intervals_two_numbers() {
assert_eq!(
// out of order
check_posix_regex_errors("ab\\{1,0\\}"),
Err(InvalidContent(r"\{\}".to_string()))
);
assert_eq!(
check_posix_regex_errors("ab\\{1,a\\}"),
Err(InvalidContent(r"\{\}".to_string()))
);
assert_eq!(
check_posix_regex_errors("ab\\{a,3\\}"),
Err(InvalidContent(r"\{\}".to_string()))
);
assert_eq!(
check_posix_regex_errors("ab\\{a,b\\}"),
Err(InvalidContent(r"\{\}".to_string()))
);
assert_eq!(
check_posix_regex_errors("ab\\{a,\\}"),
Err(InvalidContent(r"\{\}".to_string()))
);
assert_eq!(
check_posix_regex_errors("ab\\{,b\\}"),
Err(InvalidContent(r"\{\}".to_string()))
);
}
}

0 comments on commit 34421f5

Please sign in to comment.