From b50a012d1302afb40dd49d1d5796d5fd74754d04 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 15 Jan 2025 21:18:04 +0100 Subject: [PATCH] comm: implement the ordering check A few comments: * skip if the two args are pointing to the same file * skip if the same content in the two files * implement --check-order * implement --nocheck-order * output the right things on stderr Should pass: tests/misc/comm --- src/uu/comm/Cargo.toml | 2 +- src/uu/comm/src/comm.rs | 160 +++++++++++++++++++++++++++++++++++-- tests/by-util/test_comm.rs | 153 ++++++++++++++++++++++++++++++----- 3 files changed, 291 insertions(+), 24 deletions(-) diff --git a/src/uu/comm/Cargo.toml b/src/uu/comm/Cargo.toml index dd691971555..ce250c554c3 100644 --- a/src/uu/comm/Cargo.toml +++ b/src/uu/comm/Cargo.toml @@ -18,7 +18,7 @@ path = "src/comm.rs" [dependencies] clap = { workspace = true } -uucore = { workspace = true } +uucore = { workspace = true, features = ["fs"] } [[bin]] name = "comm" diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index ae57b8bf8a0..e075830cb85 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -3,12 +3,13 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) delim mkdelim +// spell-checker:ignore (ToDO) delim mkdelim pairable use std::cmp::Ordering; use std::fs::{metadata, File}; -use std::io::{self, stdin, BufRead, BufReader, Stdin}; +use std::io::{self, stdin, BufRead, BufReader, Read, Stdin}; use uucore::error::{FromIo, UResult, USimpleError}; +use uucore::fs::paths_refer_to_same_file; use uucore::line_ending::LineEnding; use uucore::{format_usage, help_about, help_usage}; @@ -27,6 +28,30 @@ mod options { pub const FILE_2: &str = "FILE2"; pub const TOTAL: &str = "total"; pub const ZERO_TERMINATED: &str = "zero-terminated"; + pub const CHECK_ORDER: &str = "check-order"; + pub const NO_CHECK_ORDER: &str = "nocheck-order"; +} + +#[derive(Debug, Clone, Copy)] +enum FileNumber { + One, + Two, +} + +impl FileNumber { + fn as_str(&self) -> &'static str { + match self { + FileNumber::One => "1", + FileNumber::Two => "2", + } + } +} + +struct OrderChecker { + last_line: Vec, + file_num: FileNumber, + check_order: bool, + has_error: bool, } enum Input { @@ -60,7 +85,74 @@ impl LineReader { } } -fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) { +impl OrderChecker { + fn new(file_num: FileNumber, check_order: bool) -> Self { + Self { + last_line: Vec::new(), + file_num, + check_order, + has_error: false, + } + } + + fn verify_order(&mut self, current_line: &[u8]) -> bool { + if self.last_line.is_empty() { + self.last_line = current_line.to_vec(); + return true; + } + + let is_ordered = current_line >= &self.last_line; + if !is_ordered && !self.has_error { + eprintln!( + "comm: file {} is not in sorted order", + self.file_num.as_str() + ); + self.has_error = true; + } + + self.last_line = current_line.to_vec(); + is_ordered || !self.check_order + } +} + +// Check if two files are identical by comparing their contents +pub fn are_files_identical(path1: &str, path2: &str) -> io::Result { + // First compare file sizes + let metadata1 = std::fs::metadata(path1)?; + let metadata2 = std::fs::metadata(path2)?; + + if metadata1.len() != metadata2.len() { + return Ok(false); + } + + let file1 = File::open(path1)?; + let file2 = File::open(path2)?; + + let mut reader1 = BufReader::new(file1); + let mut reader2 = BufReader::new(file2); + + let mut buffer1 = [0; 8192]; + let mut buffer2 = [0; 8192]; + + loop { + let bytes1 = reader1.read(&mut buffer1)?; + let bytes2 = reader2.read(&mut buffer2)?; + + if bytes1 != bytes2 { + return Ok(false); + } + + if bytes1 == 0 { + return Ok(true); + } + + if buffer1[..bytes1] != buffer2[..bytes2] { + return Ok(false); + } + } +} + +fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> { let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1)); let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2)); @@ -76,6 +168,26 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) let mut total_col_2 = 0; let mut total_col_3 = 0; + let check_order = opts.get_flag(options::CHECK_ORDER); + let no_check_order = opts.get_flag(options::NO_CHECK_ORDER); + + // Determine if we should perform order checking + let should_check_order = !no_check_order + && (check_order + || if let (Some(file1), Some(file2)) = ( + opts.get_one::(options::FILE_1), + opts.get_one::(options::FILE_2), + ) { + !(paths_refer_to_same_file(file1, file2, true) + || are_files_identical(file1, file2).unwrap_or(false)) + } else { + true + }); + + let mut checker1 = OrderChecker::new(FileNumber::One, check_order); + let mut checker2 = OrderChecker::new(FileNumber::Two, check_order); + let mut input_error = false; + while na.is_ok() || nb.is_ok() { let ord = match (na.is_ok(), nb.is_ok()) { (false, true) => Ordering::Greater, @@ -91,6 +203,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) match ord { Ordering::Less => { + if should_check_order && !checker1.verify_order(ra) { + break; + } if !opts.get_flag(options::COLUMN_1) { print!("{}", String::from_utf8_lossy(ra)); } @@ -99,6 +214,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) total_col_1 += 1; } Ordering::Greater => { + if should_check_order && !checker2.verify_order(rb) { + break; + } if !opts.get_flag(options::COLUMN_2) { print!("{delim_col_2}{}", String::from_utf8_lossy(rb)); } @@ -107,6 +225,10 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) total_col_2 += 1; } Ordering::Equal => { + if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb)) + { + break; + } if !opts.get_flag(options::COLUMN_3) { print!("{delim_col_3}{}", String::from_utf8_lossy(ra)); } @@ -117,12 +239,27 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) total_col_3 += 1; } } + + // Track if we've seen any order errors + if (checker1.has_error || checker2.has_error) && !input_error && !check_order { + input_error = true; + } } if opts.get_flag(options::TOTAL) { let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED)); print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}"); } + + if should_check_order && (checker1.has_error || checker2.has_error) { + // Print the input error message once at the end + if input_error { + eprintln!("comm: input is not in sorted order"); + } + Err(USimpleError::new(1, "")) + } else { + Ok(()) + } } fn open_file(name: &str, line_ending: LineEnding) -> io::Result { @@ -170,8 +307,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { "" => "\0", delim => delim, }; - comm(&mut f1, &mut f2, delim, &matches); - Ok(()) + + comm(&mut f1, &mut f2, delim, &matches) } pub fn uu_app() -> Command { @@ -233,4 +370,17 @@ pub fn uu_app() -> Command { .help("output a summary") .action(ArgAction::SetTrue), ) + .arg( + Arg::new(options::CHECK_ORDER) + .long(options::CHECK_ORDER) + .help("check that the input is correctly sorted, even if all input lines are pairable") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(options::NO_CHECK_ORDER) + .long(options::NO_CHECK_ORDER) + .help("do not check that the input is correctly sorted") + .action(ArgAction::SetTrue) + .conflicts_with(options::CHECK_ORDER), + ) } diff --git a/tests/by-util/test_comm.rs b/tests/by-util/test_comm.rs index ac0d11c0908..bad00b1290e 100644 --- a/tests/by-util/test_comm.rs +++ b/tests/by-util/test_comm.rs @@ -352,11 +352,9 @@ fn defaultcheck_order() { // * the first: if both files are not in order, the default behavior is the only // behavior that will provide an error message - // * the second: if two rows are paired but are out of order, // it won't matter if all rows in the two files are exactly the same. // This is specified in the documentation - #[test] fn defaultcheck_order_identical_bad_order_files() { let scene = TestScenario::new(util_name!()); @@ -367,21 +365,13 @@ fn defaultcheck_order_identical_bad_order_files() { .args(&["bad_order_1", "bad_order_1"]) .succeeds() .stdout_is("\t\te\n\t\td\n\t\tb\n\t\ta\n"); -} - -#[cfg_attr(not(feature = "test_unimplemented"), ignore)] -#[test] -fn defaultcheck_order_two_different_bad_order_files() { - let scene = TestScenario::new(util_name!()); - let at = &scene.fixtures; - at.write("bad_order_1", "e\nd\nb\na\n"); - at.write("bad_order_2", "e\nc\nb\na\n"); scene .ucmd() - .args(&["bad_order_1", "bad_order_2"]) + .arg("--check-order") + .args(&["bad_order_1", "bad_order_1"]) .fails() - .stdout_is("\t\te\n\tc\n\tb\n\ta\nd\nb\na\n") - .stderr_is("error to be defined"); + .stdout_is("\t\te\n") + .stderr_is("comm: file 1 is not in sorted order\n"); } // * the third: (it is not know whether this is a bug or not) @@ -389,22 +379,20 @@ fn defaultcheck_order_two_different_bad_order_files() { // where both lines are different and one or both file lines being // compared are out of order from the preceding line, // it is ignored and no errors occur. - // * the fourth: (it is not known whether this is a bug or not) // there are additional, not-yet-understood circumstances where an out-of-order // pair is ignored and is not counted against the 1 maximum out-of-order line. - -#[cfg_attr(not(feature = "test_unimplemented"), ignore)] #[test] fn unintuitive_default_behavior_1() { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; at.write("defaultcheck_unintuitive_1", "m\nh\nn\no\nc\np\n"); at.write("defaultcheck_unintuitive_2", "m\nh\nn\no\np\n"); + // Here, GNU does not fail, but uutils does scene .ucmd() .args(&["defaultcheck_unintuitive_1", "defaultcheck_unintuitive_2"]) - .succeeds() + .fails() .stdout_is("\t\tm\n\t\th\n\t\tn\n\t\to\nc\n\t\tp\n"); } @@ -458,3 +446,132 @@ fn test_is_dir() { .fails() .stderr_only("comm: .: Is a directory\n"); } + +#[test] +fn test_sorted() { + let expected_stderr = + "comm: file 2 is not in sorted order\ncomm: input is not in sorted order\n"; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.write("comm1", "1\n3"); + at.write("comm2", "3\n2"); + scene + .ucmd() + .args(&["comm1", "comm2"]) + .fails() + .code_is(1) + .stdout_is("1\n\t\t3\n\t2\n") + .stderr_is(expected_stderr); +} + +#[test] +fn test_sorted_check_order() { + let expected_stderr = "comm: file 2 is not in sorted order\n"; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.write("comm1", "1\n3"); + at.write("comm2", "3\n2"); + scene + .ucmd() + .arg("--check-order") + .args(&["comm1", "comm2"]) + .fails() + .code_is(1) + .stdout_is("1\n\t\t3\n") + .stderr_is(expected_stderr); +} + +#[test] +fn test_both_inputs_out_of_order() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.write("file_a", "3\n1\n0\n"); + at.write("file_b", "3\n2\n0\n"); + + scene + .ucmd() + .args(&["file_a", "file_b"]) + .fails() + .code_is(1) + .stdout_is("\t\t3\n1\n0\n\t2\n\t0\n") + .stderr_is( + "comm: file 1 is not in sorted order\n\ + comm: file 2 is not in sorted order\n\ + comm: input is not in sorted order\n", + ); +} + +#[test] +fn test_both_inputs_out_of_order_last_pair() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.write("file_a", "3\n1\n"); + at.write("file_b", "3\n2\n"); + + scene + .ucmd() + .args(&["file_a", "file_b"]) + .fails() + .code_is(1) + .stdout_is("\t\t3\n1\n\t2\n") + .stderr_is( + "comm: file 1 is not in sorted order\n\ + comm: file 2 is not in sorted order\n\ + comm: input is not in sorted order\n", + ); +} + +#[test] +fn test_first_input_out_of_order_extended() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.write("file_a", "0\n3\n1\n"); + at.write("file_b", "2\n3\n"); + + scene + .ucmd() + .args(&["file_a", "file_b"]) + .fails() + .code_is(1) + .stdout_is("0\n\t2\n\t\t3\n1\n") + .stderr_is( + "comm: file 1 is not in sorted order\n\ + comm: input is not in sorted order\n", + ); +} + +#[test] +fn test_out_of_order_input_nocheck() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + // Create input files + at.write("file_a", "1\n3\n"); + at.write("file_b", "3\n2\n"); + + scene + .ucmd() + .arg("--nocheck-order") + .args(&["file_a", "file_b"]) + .succeeds() + .stdout_is("1\n\t\t3\n\t2\n") + .no_stderr(); +} + +#[test] +fn test_both_inputs_out_of_order_but_identical() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("file_a", "2\n1\n0\n"); + at.write("file_b", "2\n1\n0\n"); + + scene + .ucmd() + .args(&["file_a", "file_b"]) + .succeeds() + .stdout_is("\t\t2\n\t\t1\n\t\t0\n") + .no_stderr(); +}