diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index a9287a3c673..f4ed25a97ae 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -355,7 +355,7 @@ fn execute( arguments_after_options: ValuesRef<'_, OsString>, ) -> UResult<()> { for (i, input) in arguments_after_options.enumerate() { - let bytes = uucore::format::bytes_from_os_str(input)?; + let bytes = uucore::format::try_get_bytes_from_os_str(input)?; if i > 0 { stdout_lock.write_all(b" ")?; diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index 217d3ef6181..eac02ae6b86 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -11,7 +11,9 @@ use std::ffi::OsString; use std::io::stdout; use std::ops::ControlFlow; use uucore::error::{UResult, UUsageError}; -use uucore::format::{bytes_from_os_str, parse_spec_and_escape, FormatArgument, FormatItem}; +use uucore::format::{ + parse_spec_and_escape, try_get_bytes_from_os_str, FormatArgument, FormatItem, +}; use uucore::{format_usage, help_about, help_section, help_usage}; const VERSION: &str = "version"; @@ -33,7 +35,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .get_one::(options::FORMAT) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; - let format_bytes = bytes_from_os_str(format)?; + let format_bytes = try_get_bytes_from_os_str(format)?; let values = match matches.get_many::(options::ARGUMENT) { Some(os_string) => os_string diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 5ec847a9549..090817acaa2 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -3,14 +3,19 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use super::FormatError; use crate::{ - error::{set_exit_code, UResult, USimpleError}, + error::{set_exit_code, UError}, features::format::num_parser::{ParseError, ParsedNumber}, quoting_style::{escape_name, Quotes, QuotingStyle}, - show, show_error, show_warning, + show_error, show_warning, }; use os_display::Quotable; -use std::ffi::{OsStr, OsString}; +use std::{ + error::Error, + ffi::{OsStr, OsString}, + fmt::Display, +}; /// An argument for formatting /// @@ -31,70 +36,70 @@ pub enum FormatArgument { } pub trait ArgumentIter<'a>: Iterator { - fn get_char(&mut self) -> u8; - fn get_i64(&mut self) -> i64; - fn get_u64(&mut self) -> u64; - fn get_f64(&mut self) -> f64; + fn get_char(&mut self) -> Result; + fn get_i64(&mut self) -> Result; + fn get_u64(&mut self) -> Result; + fn get_f64(&mut self) -> Result; fn get_str(&mut self) -> &'a OsStr; } impl<'a, T: Iterator> ArgumentIter<'a> for T { - fn get_char(&mut self) -> u8 { + fn get_char(&mut self) -> Result { let Some(next) = self.next() else { - return b'\0'; + return Ok(b'\0'); }; match next { - FormatArgument::Char(c) => *c as u8, - FormatArgument::Unparsed(os) => match bytes_from_os_str(os).unwrap().first() { - Some(&byte) => byte, - None => b'\0', + FormatArgument::Char(c) => Ok(*c as u8), + FormatArgument::Unparsed(os) => match try_get_bytes_from_os_str(os)?.first() { + Some(&byte) => Ok(byte), + None => Ok(b'\0'), }, - _ => b'\0', + _ => Ok(b'\0'), } } - fn get_u64(&mut self) -> u64 { + fn get_u64(&mut self) -> Result { let Some(next) = self.next() else { - return 0; + return Ok(0); }; match next { - FormatArgument::UnsignedInt(n) => *n, + FormatArgument::UnsignedInt(n) => Ok(*n), FormatArgument::Unparsed(os) => { - let str = get_str_or_exit_with_error(os); + let str = try_get_str_from_os_str(os)?; - extract_value(ParsedNumber::parse_u64(str), str) + Ok(extract_value(ParsedNumber::parse_u64(str), str)) } - _ => 0, + _ => Ok(0), } } - fn get_i64(&mut self) -> i64 { + fn get_i64(&mut self) -> Result { let Some(next) = self.next() else { - return 0; + return Ok(0); }; match next { - FormatArgument::SignedInt(n) => *n, + FormatArgument::SignedInt(n) => Ok(*n), FormatArgument::Unparsed(os) => { - let str = get_str_or_exit_with_error(os); + let str = try_get_str_from_os_str(os)?; - extract_value(ParsedNumber::parse_i64(str), str) + Ok(extract_value(ParsedNumber::parse_i64(str), str)) } - _ => 0, + _ => Ok(0), } } - fn get_f64(&mut self) -> f64 { + fn get_f64(&mut self) -> Result { let Some(next) = self.next() else { - return 0.0; + return Ok(0.0); }; match next { - FormatArgument::Float(n) => *n, + FormatArgument::Float(n) => Ok(*n), FormatArgument::Unparsed(os) => { - let str = get_str_or_exit_with_error(os); + let str = try_get_str_from_os_str(os)?; - extract_value(ParsedNumber::parse_f64(str), str) + Ok(extract_value(ParsedNumber::parse_f64(str), str)) } - _ => 0.0, + _ => Ok(0.0), } } @@ -135,6 +140,7 @@ fn extract_value(p: Result>, input: &str) -> T } else { show_error!("{}: value not completely converted", input.quote()); } + v } } @@ -142,7 +148,33 @@ fn extract_value(p: Result>, input: &str) -> T } } -pub fn bytes_from_os_str(input: &OsStr) -> UResult<&[u8]> { +#[derive(Debug)] +pub struct NonUtf8OsStr(pub String); + +impl Display for NonUtf8OsStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "invalid (non-UTF-8) string like {} encountered", + self.0.quote(), + )) + } +} + +impl Error for NonUtf8OsStr {} +impl UError for NonUtf8OsStr {} + +pub fn try_get_str_from_os_str(os_str: &OsStr) -> Result<&str, NonUtf8OsStr> { + match os_str.to_str() { + Some(st) => Ok(st), + None => { + let cow = os_str.to_string_lossy(); + + Err(NonUtf8OsStr(cow.to_string())) + } + } +} + +pub fn try_get_bytes_from_os_str(input: &OsStr) -> Result<&[u8], NonUtf8OsStr> { let result = { #[cfg(target_family = "unix")] { @@ -153,38 +185,18 @@ pub fn bytes_from_os_str(input: &OsStr) -> UResult<&[u8]> { #[cfg(not(target_family = "unix"))] { - use crate::error::USimpleError; - // TODO // Verify that this works correctly on these platforms match input.to_str().map(|st| st.as_bytes()) { Some(sl) => Ok(sl), - None => Err(USimpleError::new( - 1, - "non-UTF-8 string encountered when not allowed", - )), + None => { + let cow = input.to_string_lossy(); + + Err(NonUtf8OsStr(cow.to_string())) + } } } }; result } - -fn get_str_or_exit_with_error(os_str: &OsStr) -> &str { - match os_str.to_str() { - Some(st) => st, - None => { - let cow = os_str.to_string_lossy(); - - let quoted = cow.quote(); - - let error = format!( - "argument like {quoted} is not a valid UTF-8 string, and could not be parsed as an integer", - ); - - show!(USimpleError::new(1, error.clone())); - - panic!("{error}"); - } - } -} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 6d7a2ee3079..2bc6db16bfe 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -38,6 +38,7 @@ pub mod num_parser; mod spec; pub use argument::*; +use os_display::Quotable; use spec::Spec; use std::{ error::Error, @@ -63,6 +64,7 @@ pub enum FormatError { NeedAtLeastOneSpec(Vec), WrongSpecType, InvalidPrecision(String), + InvalidEncoding(NonUtf8OsStr), } impl Error for FormatError {} @@ -74,6 +76,12 @@ impl From for FormatError { } } +impl From for FormatError { + fn from(value: NonUtf8OsStr) -> FormatError { + FormatError::InvalidEncoding(value) + } +} + impl Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -98,6 +106,13 @@ impl Display for FormatError { Self::IoError(_) => write!(f, "io error"), Self::NoMoreArguments => write!(f, "no more arguments"), Self::InvalidArgument(_) => write!(f, "invalid argument"), + Self::InvalidEncoding(no) => { + write!( + f, + "invalid (non-UTF-8) argument like {} encountered", + no.0.quote() + ) + } } } } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index e3b88fd68d4..fa8ab2ac196 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -6,12 +6,11 @@ // spell-checker:ignore (vars) intmax ptrdiff padlen use super::{ - bytes_from_os_str, num_format::{ self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, }, - parse_escape_only, ArgumentIter, FormatChar, FormatError, + parse_escape_only, try_get_bytes_from_os_str, ArgumentIter, FormatChar, FormatError, }; use crate::quoting_style::{escape_name, QuotingStyle}; use std::{io::Write, ops::ControlFlow}; @@ -315,7 +314,7 @@ impl Spec { match self { Self::Char { width, align_left } => { let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); - write_padded(writer, &[args.get_char()], width, *align_left) + write_padded(writer, &[args.get_char()?], width, *align_left) } Self::String { width, @@ -334,7 +333,7 @@ impl Spec { let os_str = args.get_str(); - let bytes = bytes_from_os_str(os_str).unwrap(); + let bytes = try_get_bytes_from_os_str(os_str).unwrap(); let truncated = match precision { Some(p) if p < os_str.len() => &bytes[..p], @@ -346,7 +345,7 @@ impl Spec { Self::EscapedString => { let os_str = args.get_str(); - let bytes = bytes_from_os_str(os_str).unwrap(); + let bytes = try_get_bytes_from_os_str(os_str).unwrap(); let mut parsed = Vec::::new(); @@ -386,7 +385,7 @@ impl Spec { } => { let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0); - let i = args.get_i64(); + let i = args.get_i64()?; if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); @@ -409,7 +408,7 @@ impl Spec { } => { let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0); - let i = args.get_u64(); + let i = args.get_u64()?; if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); @@ -435,7 +434,7 @@ impl Spec { } => { let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(6); - let f = args.get_f64(); + let f = args.get_f64()?; if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); @@ -463,7 +462,7 @@ fn resolve_asterisk<'a>( ) -> Result, FormatError> { Ok(match option { None => None, - Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)), + Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()?).ok().unwrap_or(0)), Some(CanAsterisk::Fixed(w)) => Some(w), }) } diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 41a247dc9d2..be1a10093f1 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -941,4 +941,11 @@ fn non_utf_8_input() { .arg(os_str) .succeeds() .stdout_only_bytes(INPUT_AND_OUTPUT); + + new_ucmd!() + .arg("%d") + .arg(os_str) + .fails() + .stderr_only("printf: invalid (non-UTF-8) argument like 'Swer an rehte g�ete wendet s�n gem�ete, dem volget s�lde und �re.' encountered +"); }