Skip to content

Commit

Permalink
Merge pull request #5 from greymd/feature/pcre2
Browse files Browse the repository at this point in the history
Support Oniguruma regular expression
  • Loading branch information
greymd authored Jun 7, 2020
2 parents 0c34616 + 783e369 commit 4d751f5
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 83 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ jobs:
include:
- name: regex_match
cmd: teip -r '.*sshd' < test_secure
- name: pcre_match
cmd: teip -P '.*sshd' < test_secure
- name: onig_match
cmd: teip -R '.*sshd' < test_secure
- name: convert_date
cmd: teip -c1-15 -- date -f- +%s < test_secure
steps:
Expand All @@ -48,7 +48,7 @@ jobs:
- name: Build
run: |
rustup target add x86_64-unknown-linux-musl
CC="musl-gcc -static" cargo build --verbose --release --target x86_64-unknown-linux-musl
CFLAGS="-fPIE" CC="musl-gcc -static" cargo build --verbose --release --target x86_64-unknown-linux-musl
mv target/x86_64-unknown-linux-musl/release/teip target/release
- name: Prepare benchmarking
Expand Down
10 changes: 3 additions & 7 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,13 @@ jobs:
- name: Install musl-gcc if required
if: contains(matrix.target, 'musl')
run: |
sudo apt-get install musl-tools autoconf autotools-dev
- name: Install buildtools if required
if: contains(matrix.target, 'apple')
run: |
brew install pcre pkg-config
sudo apt-get install musl-tools
- name: Compile for musl
if: contains(matrix.target, 'musl')
run: |
rustup target add ${{ matrix.target }}
CC="musl-gcc -static" cargo build --verbose --release --target ${{ matrix.target }}
CFLAGS="-fPIE" CC="musl-gcc -static" cargo build --verbose --release --target ${{ matrix.target }}
- name: Compile
if: "! contains(matrix.target, 'musl')"
Expand All @@ -63,6 +58,7 @@ jobs:
mv target/${{ matrix.target }}/release/teip package/bin
mkdir -p package/man
cp man/teip.1 package/man
cp -r completion package/
## sed -i is not used due to difference between macOS and Linux
perl -i -pe s/___VERSION___/${{ steps.vars.outputs.ver }}/ ./package/.tar2package.yml
tar zcvf "$_TAR" -C "$PWD/package" bin man .tar2package.yml
Expand Down
8 changes: 2 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,12 @@ jobs:
- name: Install musl-gcc if required
if: contains(matrix.target, 'musl')
run: |
sudo apt-get install musl-tools autoconf autotools-dev
- name: Install buildtools if required
if: contains(matrix.target, 'apple')
run: |
brew install pcre pkg-config
sudo apt-get install musl-tools
- name: Build the release target
run: |
rustup target add ${{ matrix.target }}
if [[ ${{ matrix.target }} =~ "musl" ]] ;then
CC="musl-gcc -static" cargo build --verbose --release --target ${{ matrix.target }}
CFLAGS="-fPIE" CC="musl-gcc -static" cargo build --release --verbose --target x86_64-unknown-linux-musl
else
cargo build --verbose --release --target ${{ matrix.target }}
fi
Expand Down
8 changes: 2 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "teip"
version = "1.1.0"
version = "1.1.2-beta"
authors = ["Yasuhiro Yamada <[email protected]>"]
description = "Highly efficient \"Masking tape\" for standard input"
keywords = ["sed", "awk", "util"]
Expand All @@ -18,11 +18,7 @@ regex = "1"
log = { version = "0.4", features = ["max_level_trace", "release_max_level_warn"] }
env_logger = "0.7.1"
lazy_static = "1.4.0"
atty = "0.2.14"
enum-set = ">= 0.0.5"

[target.'cfg(unix)'.dependencies]
pcre = { git = "https://github.com/greymd/rust-pcre", branch = "unix" }
onig = "6"

[dev-dependencies]
assert_cmd = "1.0.1"
Expand Down
17 changes: 11 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ $ sudo dpkg -i ./teip*.deb
<!-- deb_x86_64_end -->
<!-- deb_x86_64_sha256 -->SHA256: 077683f9ac3cb84d712216dec3aba03f9b04c12c700cbffea2b38fbaff849872

## With dnf (For CentOS, RHEL users)

<!-- rpm_x86_64_start -->
```bash
$ sudo dnf install https://github.com/greymd/teip/releases/download/v1.1.0/teip-1.1.0.x86_64-unknown-linux-musl.rpm
```
<!-- rpm_x86_64_end -->
<!-- rpm_x86_64_sha256 -->SHA256: d00399fbd0fdd338bbd5fabfb53fc855fd686752dab945c224e766e944e0abff

## With yum (For CentOS7, RHEL7, Amazon Linux 2 users)

<!-- rpm_x86_64_start -->
Expand All @@ -93,7 +102,7 @@ Unfortunately, `teip` does not work on Windows due to technical reason.

```
Usage:
teip (-r <pattern> | -P <pattern>) [-svz] [--] [<command>...]
teip (-r <pattern> | -R <pattern>) [-svz] [--] [<command>...]
teip -f <list> [-d <delimiter> | -D <pattern>] [-svz] [--] [<command>...]
teip -c <list> [-svz] [--] [<command>...]
teip --help | --version
Expand All @@ -102,7 +111,7 @@ Options:
--help Display this help and exit
--version Show version and exit
-r <pattern> Select strings matched by given regular expression <pattern>
-P <pattern> EXPERIMENTAL: Same as -r but use Perl-compatible regular expressions (PCREs)
-R <pattern> EXPERIMENTAL: Same as -r but use Oniguruma regular expressions
-f <list> Select only these white-space separated fields
-d <delimiter> Use <delimiter> for field delimiter of -f
-D <pattern> Use regular expression <pattern> for field delimiter of -f
Expand Down Expand Up @@ -636,10 +645,6 @@ See this [post](https://dev.to/greymd/teip-masking-tape-for-shell-is-what-we-nee

Thank you so much for helpful modules!

* pcre crate
- Based on [cadencemarseille/rust-pcre](https://github.com/cadencemarseille/rust-pcre) forked by [omakoto](https://github.com/omakoto/rust-pcre)
- Original souce codes are distributed under MIT/Apache-2.0 license

* ./src/list/ranges.rs
- One of the module used in `cut` command of [uutils/coreutils](https://github.com/uutils/coreutils)
- Original souce codes are distributed under MIT license
Expand Down
8 changes: 4 additions & 4 deletions benches/cmdbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ fn standard_regex_double(lap: usize) {
let _ = child.wait_with_output();
}

fn pcre_double(lap: usize) {
fn onig_double(lap: usize) {
let mut child = Command::new(CMD)
.stdin(Stdio::piped())
.stdout(Stdio::null()) // comment out to check output.
.args(&["-P", "\\d+", "sed", "s/./@/"])
.args(&["-R", "\\d+", "sed", "s/./@/"])
.spawn()
.expect("Failed to swapn process");
{
Expand Down Expand Up @@ -193,8 +193,8 @@ fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("standard_regex_double 10000", |b| {
b.iter(|| standard_regex_double(black_box(10000)))
});
c.bench_function("pcre_double 10000", |b| {
b.iter(|| pcre_double(black_box(10000)))
c.bench_function("onig_double 10000", |b| {
b.iter(|| onig_double(black_box(10000)))
});
c.bench_function("field_double 10000", |b| {
b.iter(|| field_double(black_box(10000)))
Expand Down
2 changes: 1 addition & 1 deletion completion/zsh/_teip
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ _teip () {
_arguments "(: -)--help[Display this help and exit]" \
"(: -)--version[Show version and exit]" \
"($opts_omit -r -f -d -D -c)-r[Select strings matched by given regular expression <pattern>]:pattern:" \
"($opts_omit -P -r -f -d -D -c)-r[EXPERIMENTAL: Same as -r but use Perl-compatible regular expressions (PCREs)]:pattern:" \
"($opts_omit -R -r -f -d -D -c)-R[EXPERIMENTAL: Same as -r but use Oniguruma regular expressions]:pattern:" \
"($opts_omit -f -r -c)-f[Select only these white-space separated fields]:list:" \
"($opts_omit -d -D -r -c)-d[Use <delimiter> for field delimiter of -f]:delimiter:" \
"($opts_omit -D -d -r -c)-D[Use regular expression <pattern> for field delimiter of -f]:pattern:" \
Expand Down
8 changes: 4 additions & 4 deletions man/man.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ teip - Highly efficient "Masking tape" for standard input
SYNOPSIS
--------

`teip` (-r <*pattern*> | -P <*pattern*>) [-svz] [--] [<*command*>...]
`teip` (-r <*pattern*> | -R <*pattern*>) [-svz] [--] [<*command*>...]

`teip` -f <*list*> [-d <*delimiter*> | -D <*pattern*>] [-svz] [--] [<*command*>...]

Expand All @@ -36,9 +36,9 @@ OPTIONS
`-r` <*pattern*>
Select strings matched by a regular expression <*pattern*>

`-P` <*pattern*>
Same as -r but use Perl-compatible regular expressions (PCREs)
**This feature is experimental and might be abolished in the future**
`-R` <*pattern*>
Same as -r but use Oniguruma regular expressions
**This feature might be abolished in the future because it is experimental**

`-f` <*list*>
Select only these white-space separated fields
Expand Down
7 changes: 4 additions & 3 deletions man/teip.1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
teip \- Highly efficient "Masking tape" for standard input
.SH SYNOPSIS
.PP
\fB\fCteip\fR (\-r <\fIpattern\fP> | \-P <\fIpattern\fP>) [\-svz] [\-\-] [<\fIcommand\fP>...]
\fB\fCteip\fR (\-r <\fIpattern\fP> | \-R <\fIpattern\fP>) [\-svz] [\-\-] [<\fIcommand\fP>...]
.PP
\fB\fCteip\fR \-f <\fIlist\fP> [\-d <\fIdelimiter\fP> | \-D <\fIpattern\fP>] [\-svz] [\-\-] [<\fIcommand\fP>...]
.PP
Expand All @@ -25,8 +25,9 @@ Show version and exit
\fB\fC\-r\fR <\fIpattern\fP>
Select strings matched by a regular expression <\fIpattern\fP>
.TP
\fB\fC\-P\fR <\fIpattern\fP>
EXPERIMENTAL: Same as \-r but use Perl\-compatible regular expressions (PCREs)
\fB\fC\-R\fR <\fIpattern\fP>
Same as \-r but use Oniguruma regular expressions
\fBThis feature might be abolished in the future because it is experimental\fP
.TP
\fB\fC\-f\fR <\fIlist\fP>
Select only these white\-space separated fields
Expand Down
45 changes: 22 additions & 23 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd};
use std::process::{Command, Stdio};
use std::sync::mpsc::{self, Sender};
use std::thread::{self, JoinHandle};
use enum_set::{self, EnumSet};
use pcre::Pcre;
use onig::{self};
use token::Token;

const CMD: &'static str = env!("CARGO_PKG_NAME"); // "teip"
Expand Down Expand Up @@ -314,7 +313,7 @@ lazy_static! {
Only a selected part of standard input is passed to any command for execution.
Usage:
{cmd} (-r <pattern> | -P <pattern>) [-svz] [--] [<command>...]
{cmd} (-r <pattern> | -R <pattern>) [-svz] [--] [<command>...]
{cmd} -f <list> [-d <delimiter> | -D <pattern>] [-svz] [--] [<command>...]
{cmd} -c <list> [-svz] [--] [<command>...]
{cmd} --help | --version
Expand All @@ -323,7 +322,7 @@ Options:
--help Display this help and exit
--version Show version and exit
-r <pattern> Select strings matched by given regular expression <pattern>
-P <pattern> EXPERIMENTAL: Same as -r but use Perl-compatible regular expressions (PCREs)
-R <pattern> EXPERIMENTAL: Same as -r but use Oniguruma regular expressions
-f <list> Select only these white-space separated fields
-d <delimiter> Use <delimiter> for field delimiter of -f
-D <pattern> Use regular expression <pattern> for field delimiter of -f
Expand Down Expand Up @@ -360,28 +359,29 @@ fn main() {

let mut line_end = b'\n';
let mut regex_mode = String::new();
let mut pcre_options: EnumSet<pcre::CompileOption> = EnumSet::new();
pcre_options.insert(pcre::CompileOption::Ucp);

let flag_zero = args.get_bool("-z");
if flag_zero {
regex_mode = "(?ms)".to_string();
line_end = b'\0';
pcre_options.insert(pcre::CompileOption::Multiline);
}
let cmds = args.get_vec("<command>");
let flag_regex = args.get_bool("-r");
let flag_pcre = args.get_bool("-P");
let flag_onig = args.get_bool("-R");
let mut regex = Regex::new("").unwrap();
if ! flag_pcre {
if ! flag_onig {
regex = Regex::new(&(regex_mode.to_string() + args.get_str("-r")))
.unwrap_or_else(|e| error_exit(&e.to_string()));
}

let regex_pcre = match Pcre::compile_with_options(&args.get_str("-P"), &pcre_options) {
Ok(re) => re,
Err(e) => error_exit(&e.to_string()),
};
let regex_onig: onig::Regex;
if flag_zero {
regex_onig = onig::Regex::with_options(&args.get_str("-R"), onig::RegexOptions::REGEX_OPTION_MULTILINE, onig::Syntax::default())
.unwrap_or_else(|e| error_exit(&e.to_string()));
} else {
regex_onig = onig::Regex::with_options(&args.get_str("-R"), onig::RegexOptions::REGEX_OPTION_NONE, onig::Syntax::default())
.unwrap_or_else(|e| error_exit(&e.to_string()));
}

let flag_invert = args.get_bool("-v");
let flag_char = args.get_bool("-c");
Expand Down Expand Up @@ -444,8 +444,8 @@ fn main() {
if flag_regex {
regex_proc(&mut ch, &buf, &regex, flag_invert)
.unwrap_or_else(|e| error_exit(&e.to_string()));
} else if flag_pcre {
regex_pcre_proc(&mut ch, &buf, &regex_pcre, flag_invert)
} else if flag_onig {
regex_onig_proc(&mut ch, &buf, &regex_onig, flag_invert)
.unwrap_or_else(|e| error_exit(&e.to_string()));
} else if flag_char {
char_proc(&mut ch, &buf, &char_list)
Expand All @@ -465,21 +465,20 @@ fn main() {
}
}

/// Handles regex pcre ( -r -P )
fn regex_pcre_proc(
/// Handles regex onig ( -r -R )
fn regex_onig_proc(
ch: &mut PipeIntercepter,
line: &Vec<u8>,
re: &Pcre,
re: &onig::Regex,
invert: bool,
) -> Result<(), errors::TokenSendError> {
let line = String::from_utf8_lossy(&line).to_string();
let mut left_index = 0;
let mut right_index;
let iter = re.matches(&line);
for (_, cap) in iter.enumerate() {
right_index = cap.group_start(0);
for cap in re.find_iter(&line) {
right_index = cap.0;
let unmatched = &line[left_index..right_index];
let matched = &line[cap.group_start(0)..cap.group_end(0)];
let matched = &line[cap.0..cap.1];
// Ignore empty string.
// Regex "*" matches empty, but , in most situations,
// handling empty string is not helpful for users.
Expand All @@ -495,7 +494,7 @@ fn regex_pcre_proc(
} else {
ch.send_msg(matched.to_string())?;
}
left_index = cap.group_end(0);
left_index = cap.1;
}
if left_index < line.len() {
let unmatched = &line[left_index..line.len()];
Expand Down
Loading

0 comments on commit 4d751f5

Please sign in to comment.