diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 3ca8494..bfb3f34 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -34,8 +34,8 @@ jobs: include: - name: regex_match cmd: teip -r '.*sshd' < test_secure - - name: pcre_match - cmd: teip -P '.*sshd' < test_secure + - name: onig_match + cmd: teip -R '.*sshd' < test_secure - name: convert_date cmd: teip -c1-15 -- date -f- +%s < test_secure steps: @@ -48,7 +48,7 @@ jobs: - name: Build run: | rustup target add x86_64-unknown-linux-musl - CC="musl-gcc -static" cargo build --verbose --release --target x86_64-unknown-linux-musl + CFLAGS="-fPIE" CC="musl-gcc -static" cargo build --verbose --release --target x86_64-unknown-linux-musl mv target/x86_64-unknown-linux-musl/release/teip target/release - name: Prepare benchmarking diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1433ee1..ea8e756 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -37,18 +37,13 @@ jobs: - name: Install musl-gcc if required if: contains(matrix.target, 'musl') run: | - sudo apt-get install musl-tools autoconf autotools-dev - - - name: Install buildtools if required - if: contains(matrix.target, 'apple') - run: | - brew install pcre pkg-config + sudo apt-get install musl-tools - name: Compile for musl if: contains(matrix.target, 'musl') run: | rustup target add ${{ matrix.target }} - CC="musl-gcc -static" cargo build --verbose --release --target ${{ matrix.target }} + CFLAGS="-fPIE" CC="musl-gcc -static" cargo build --verbose --release --target ${{ matrix.target }} - name: Compile if: "! contains(matrix.target, 'musl')" @@ -63,6 +58,7 @@ jobs: mv target/${{ matrix.target }}/release/teip package/bin mkdir -p package/man cp man/teip.1 package/man + cp -r completion package/ ## sed -i is not used due to difference between macOS and Linux perl -i -pe s/___VERSION___/${{ steps.vars.outputs.ver }}/ ./package/.tar2package.yml tar zcvf "$_TAR" -C "$PWD/package" bin man .tar2package.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 28344d7..42c5300 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,16 +33,12 @@ jobs: - name: Install musl-gcc if required if: contains(matrix.target, 'musl') run: | - sudo apt-get install musl-tools autoconf autotools-dev - - name: Install buildtools if required - if: contains(matrix.target, 'apple') - run: | - brew install pcre pkg-config + sudo apt-get install musl-tools - name: Build the release target run: | rustup target add ${{ matrix.target }} if [[ ${{ matrix.target }} =~ "musl" ]] ;then - CC="musl-gcc -static" cargo build --verbose --release --target ${{ matrix.target }} + CFLAGS="-fPIE" CC="musl-gcc -static" cargo build --release --verbose --target x86_64-unknown-linux-musl else cargo build --verbose --release --target ${{ matrix.target }} fi diff --git a/Cargo.toml b/Cargo.toml index 6882933..49bc6cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "teip" -version = "1.1.0" +version = "1.1.2-beta" authors = ["Yasuhiro Yamada "] description = "Highly efficient \"Masking tape\" for standard input" keywords = ["sed", "awk", "util"] @@ -18,11 +18,7 @@ regex = "1" log = { version = "0.4", features = ["max_level_trace", "release_max_level_warn"] } env_logger = "0.7.1" lazy_static = "1.4.0" -atty = "0.2.14" -enum-set = ">= 0.0.5" - -[target.'cfg(unix)'.dependencies] -pcre = { git = "https://github.com/greymd/rust-pcre", branch = "unix" } +onig = "6" [dev-dependencies] assert_cmd = "1.0.1" diff --git a/README.md b/README.md index cdb6f4e..c638395 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,15 @@ $ sudo dpkg -i ./teip*.deb SHA256: 077683f9ac3cb84d712216dec3aba03f9b04c12c700cbffea2b38fbaff849872 +## With dnf (For CentOS, RHEL users) + + +```bash +$ sudo dnf install https://github.com/greymd/teip/releases/download/v1.1.0/teip-1.1.0.x86_64-unknown-linux-musl.rpm +``` + +SHA256: d00399fbd0fdd338bbd5fabfb53fc855fd686752dab945c224e766e944e0abff + ## With yum (For CentOS7, RHEL7, Amazon Linux 2 users) @@ -93,7 +102,7 @@ Unfortunately, `teip` does not work on Windows due to technical reason. ``` Usage: - teip (-r | -P ) [-svz] [--] [...] + teip (-r | -R ) [-svz] [--] [...] teip -f [-d | -D ] [-svz] [--] [...] teip -c [-svz] [--] [...] teip --help | --version @@ -102,7 +111,7 @@ Options: --help Display this help and exit --version Show version and exit -r Select strings matched by given regular expression - -P EXPERIMENTAL: Same as -r but use Perl-compatible regular expressions (PCREs) + -R EXPERIMENTAL: Same as -r but use Oniguruma regular expressions -f Select only these white-space separated fields -d Use for field delimiter of -f -D Use regular expression for field delimiter of -f @@ -636,10 +645,6 @@ See this [post](https://dev.to/greymd/teip-masking-tape-for-shell-is-what-we-nee Thank you so much for helpful modules! -* pcre crate - - Based on [cadencemarseille/rust-pcre](https://github.com/cadencemarseille/rust-pcre) forked by [omakoto](https://github.com/omakoto/rust-pcre) - - Original souce codes are distributed under MIT/Apache-2.0 license - * ./src/list/ranges.rs - One of the module used in `cut` command of [uutils/coreutils](https://github.com/uutils/coreutils) - Original souce codes are distributed under MIT license diff --git a/benches/cmdbench.rs b/benches/cmdbench.rs index 55841cb..8260aa8 100644 --- a/benches/cmdbench.rs +++ b/benches/cmdbench.rs @@ -44,11 +44,11 @@ fn standard_regex_double(lap: usize) { let _ = child.wait_with_output(); } -fn pcre_double(lap: usize) { +fn onig_double(lap: usize) { let mut child = Command::new(CMD) .stdin(Stdio::piped()) .stdout(Stdio::null()) // comment out to check output. - .args(&["-P", "\\d+", "sed", "s/./@/"]) + .args(&["-R", "\\d+", "sed", "s/./@/"]) .spawn() .expect("Failed to swapn process"); { @@ -193,8 +193,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("standard_regex_double 10000", |b| { b.iter(|| standard_regex_double(black_box(10000))) }); - c.bench_function("pcre_double 10000", |b| { - b.iter(|| pcre_double(black_box(10000))) + c.bench_function("onig_double 10000", |b| { + b.iter(|| onig_double(black_box(10000))) }); c.bench_function("field_double 10000", |b| { b.iter(|| field_double(black_box(10000))) diff --git a/completion/zsh/_teip b/completion/zsh/_teip index 66b6b9e..86b43a5 100755 --- a/completion/zsh/_teip +++ b/completion/zsh/_teip @@ -6,7 +6,7 @@ _teip () { _arguments "(: -)--help[Display this help and exit]" \ "(: -)--version[Show version and exit]" \ "($opts_omit -r -f -d -D -c)-r[Select strings matched by given regular expression ]:pattern:" \ - "($opts_omit -P -r -f -d -D -c)-r[EXPERIMENTAL: Same as -r but use Perl-compatible regular expressions (PCREs)]:pattern:" \ + "($opts_omit -R -r -f -d -D -c)-R[EXPERIMENTAL: Same as -r but use Oniguruma regular expressions]:pattern:" \ "($opts_omit -f -r -c)-f[Select only these white-space separated fields]:list:" \ "($opts_omit -d -D -r -c)-d[Use for field delimiter of -f]:delimiter:" \ "($opts_omit -D -d -r -c)-D[Use regular expression for field delimiter of -f]:pattern:" \ diff --git a/man/man.md b/man/man.md index b85b77b..d70a8ab 100644 --- a/man/man.md +++ b/man/man.md @@ -13,7 +13,7 @@ teip - Highly efficient "Masking tape" for standard input SYNOPSIS -------- -`teip` (-r <*pattern*> | -P <*pattern*>) [-svz] [--] [<*command*>...] +`teip` (-r <*pattern*> | -R <*pattern*>) [-svz] [--] [<*command*>...] `teip` -f <*list*> [-d <*delimiter*> | -D <*pattern*>] [-svz] [--] [<*command*>...] @@ -36,9 +36,9 @@ OPTIONS `-r` <*pattern*> Select strings matched by a regular expression <*pattern*> -`-P` <*pattern*> - Same as -r but use Perl-compatible regular expressions (PCREs) - **This feature is experimental and might be abolished in the future** +`-R` <*pattern*> + Same as -r but use Oniguruma regular expressions + **This feature might be abolished in the future because it is experimental** `-f` <*list*> Select only these white-space separated fields diff --git a/man/teip.1 b/man/teip.1 index cdf2bab..0ec9995 100644 --- a/man/teip.1 +++ b/man/teip.1 @@ -4,7 +4,7 @@ teip \- Highly efficient "Masking tape" for standard input .SH SYNOPSIS .PP -\fB\fCteip\fR (\-r <\fIpattern\fP> | \-P <\fIpattern\fP>) [\-svz] [\-\-] [<\fIcommand\fP>...] +\fB\fCteip\fR (\-r <\fIpattern\fP> | \-R <\fIpattern\fP>) [\-svz] [\-\-] [<\fIcommand\fP>...] .PP \fB\fCteip\fR \-f <\fIlist\fP> [\-d <\fIdelimiter\fP> | \-D <\fIpattern\fP>] [\-svz] [\-\-] [<\fIcommand\fP>...] .PP @@ -25,8 +25,9 @@ Show version and exit \fB\fC\-r\fR <\fIpattern\fP> Select strings matched by a regular expression <\fIpattern\fP> .TP -\fB\fC\-P\fR <\fIpattern\fP> -EXPERIMENTAL: Same as \-r but use Perl\-compatible regular expressions (PCREs) +\fB\fC\-R\fR <\fIpattern\fP> +Same as \-r but use Oniguruma regular expressions +\fBThis feature might be abolished in the future because it is experimental\fP .TP \fB\fC\-f\fR <\fIlist\fP> Select only these white\-space separated fields diff --git a/src/main.rs b/src/main.rs index c047b6b..3e73c27 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,8 +18,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd}; use std::process::{Command, Stdio}; use std::sync::mpsc::{self, Sender}; use std::thread::{self, JoinHandle}; -use enum_set::{self, EnumSet}; -use pcre::Pcre; +use onig::{self}; use token::Token; const CMD: &'static str = env!("CARGO_PKG_NAME"); // "teip" @@ -314,7 +313,7 @@ lazy_static! { Only a selected part of standard input is passed to any command for execution. Usage: - {cmd} (-r | -P ) [-svz] [--] [...] + {cmd} (-r | -R ) [-svz] [--] [...] {cmd} -f [-d | -D ] [-svz] [--] [...] {cmd} -c [-svz] [--] [...] {cmd} --help | --version @@ -323,7 +322,7 @@ Options: --help Display this help and exit --version Show version and exit -r Select strings matched by given regular expression - -P EXPERIMENTAL: Same as -r but use Perl-compatible regular expressions (PCREs) + -R EXPERIMENTAL: Same as -r but use Oniguruma regular expressions -f Select only these white-space separated fields -d Use for field delimiter of -f -D Use regular expression for field delimiter of -f @@ -360,28 +359,29 @@ fn main() { let mut line_end = b'\n'; let mut regex_mode = String::new(); - let mut pcre_options: EnumSet = EnumSet::new(); - pcre_options.insert(pcre::CompileOption::Ucp); let flag_zero = args.get_bool("-z"); if flag_zero { regex_mode = "(?ms)".to_string(); line_end = b'\0'; - pcre_options.insert(pcre::CompileOption::Multiline); } let cmds = args.get_vec(""); let flag_regex = args.get_bool("-r"); - let flag_pcre = args.get_bool("-P"); + let flag_onig = args.get_bool("-R"); let mut regex = Regex::new("").unwrap(); - if ! flag_pcre { + if ! flag_onig { regex = Regex::new(&(regex_mode.to_string() + args.get_str("-r"))) .unwrap_or_else(|e| error_exit(&e.to_string())); } - let regex_pcre = match Pcre::compile_with_options(&args.get_str("-P"), &pcre_options) { - Ok(re) => re, - Err(e) => error_exit(&e.to_string()), - }; + let regex_onig: onig::Regex; + if flag_zero { + regex_onig = onig::Regex::with_options(&args.get_str("-R"), onig::RegexOptions::REGEX_OPTION_MULTILINE, onig::Syntax::default()) + .unwrap_or_else(|e| error_exit(&e.to_string())); + } else { + regex_onig = onig::Regex::with_options(&args.get_str("-R"), onig::RegexOptions::REGEX_OPTION_NONE, onig::Syntax::default()) + .unwrap_or_else(|e| error_exit(&e.to_string())); + } let flag_invert = args.get_bool("-v"); let flag_char = args.get_bool("-c"); @@ -444,8 +444,8 @@ fn main() { if flag_regex { regex_proc(&mut ch, &buf, ®ex, flag_invert) .unwrap_or_else(|e| error_exit(&e.to_string())); - } else if flag_pcre { - regex_pcre_proc(&mut ch, &buf, ®ex_pcre, flag_invert) + } else if flag_onig { + regex_onig_proc(&mut ch, &buf, ®ex_onig, flag_invert) .unwrap_or_else(|e| error_exit(&e.to_string())); } else if flag_char { char_proc(&mut ch, &buf, &char_list) @@ -465,21 +465,20 @@ fn main() { } } -/// Handles regex pcre ( -r -P ) -fn regex_pcre_proc( +/// Handles regex onig ( -r -R ) +fn regex_onig_proc( ch: &mut PipeIntercepter, line: &Vec, - re: &Pcre, + re: &onig::Regex, invert: bool, ) -> Result<(), errors::TokenSendError> { let line = String::from_utf8_lossy(&line).to_string(); let mut left_index = 0; let mut right_index; - let iter = re.matches(&line); - for (_, cap) in iter.enumerate() { - right_index = cap.group_start(0); + for cap in re.find_iter(&line) { + right_index = cap.0; let unmatched = &line[left_index..right_index]; - let matched = &line[cap.group_start(0)..cap.group_end(0)]; + let matched = &line[cap.0..cap.1]; // Ignore empty string. // Regex "*" matches empty, but , in most situations, // handling empty string is not helpful for users. @@ -495,7 +494,7 @@ fn regex_pcre_proc( } else { ch.send_msg(matched.to_string())?; } - left_index = cap.group_end(0); + left_index = cap.1; } if left_index < line.len() { let unmatched = &line[left_index..line.len()]; diff --git a/tests/lib.rs b/tests/lib.rs index dae28c2..b8d8ae0 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -136,30 +136,30 @@ mod cmdtest { } #[test] - fn test_pcre() { + fn test_onig() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-P", "\\d+(?=D)", "sed", "s/./@/g"]) + cmd.args(&["-R", "\\d+(?=D)", "sed", "s/./@/g"]) .write_stdin("ABC123DEF456\n") .assert() .stdout("ABC@@@DEF456\n"); } #[test] - fn test_pcre_invert() { + fn test_onig_invert() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-vP", "\\d+(?=D)", "sed", "s/./@/g"]) + cmd.args(&["-vR", "\\d+(?=D)", "sed", "s/./@/g"]) .write_stdin("ABC123DEF456\n") .assert() .stdout("@@@123@@@@@@\n"); } #[test] - fn test_pcre_null() { + fn test_onig_null() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); // Use perl -0 instead of sed -z because BSD does not support it. cmd.args(&[ "-z", - "-P", + "-R", ".\\n.", "--", "perl", @@ -173,57 +173,57 @@ mod cmdtest { } #[test] - fn test_pcre_null_invert() { + fn test_onig_null_invert() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); // Use perl -0 instead of sed -z because BSD does not support it. - cmd.args(&["-zvP", "^...", "tr", "[:alnum:]", "@"]) + cmd.args(&["-zvR", "^...", "tr", "[:alnum:]", "@"]) .write_stdin("ABC123EFG\0HIJKLM456") .assert() .stdout("ABC@@@@@@\0HIJ@@@@@@"); } #[test] - fn test_pcre_multiple() { + fn test_onig_multiple() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-P", "C\\K\\d+(?=D)", "sed", "s/./@/g"]) + cmd.args(&["-R", "C\\K\\d+(?=D)", "sed", "s/./@/g"]) .write_stdin("ABC123DEF456\nEFG123ABC456DEF\n") .assert() .stdout("ABC@@@DEF456\nEFG123ABC@@@DEF\n"); } #[test] - fn test_solid_pcre() { + fn test_solid_onig() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-s", "-P", "2", "sed", "s/./A/"]) + cmd.args(&["-s", "-R", "2", "sed", "s/./A/"]) .write_stdin("118\n119\n120\n121\n") .assert() .stdout("118\n119\n1A0\n1A1\n"); } #[test] - fn test_solid_pcre_invert() { + fn test_solid_onig_invert() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-s", "-P", "\\d+", "-v", "tr", "[:upper:]", "[:lower:]"]) + cmd.args(&["-s", "-R", "\\d+", "-v", "tr", "[:upper:]", "[:lower:]"]) .write_stdin("ABC123EFG\nHIJKLM456") .assert() .stdout("abc123efg\nhijklm456"); } #[test] - fn test_solid_pcre_null_invert() { + fn test_solid_onig_null_invert() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-svP", "\\d+", "tr", "[:upper:]", "[:lower:]"]) + cmd.args(&["-svR", "\\d+", "tr", "[:upper:]", "[:lower:]"]) .write_stdin("ABC123EFG\0\nHIJKLM456") .assert() .stdout("abc123efg\0\nhijklm456"); } #[test] - fn test_solid_pcre_null() { + fn test_solid_onig_null() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); cmd.args(&[ "-sz", - "-P", + "-R", ".\\n.", "--", "perl", @@ -236,9 +236,9 @@ mod cmdtest { } #[test] - fn test_solid_pcre_null2() { + fn test_solid_onig_null2() { let mut cmd = assert_cmd::Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - cmd.args(&["-sz", "-P", "(..\\n..|F.G)", "--", "tr", "-dc", "."]) + cmd.args(&["-sz", "-R", "(..\\n..|F.G)", "--", "tr", "-dc", "."]) .write_stdin("ABC\nDEF\0GHI\nJKL") .assert() .stdout("AF\0GL");