Skip to content

Commit

Permalink
update: bump polars to 0.44.2
Browse files Browse the repository at this point in the history
  • Loading branch information
dwpeng committed Nov 7, 2024
1 parent 47d922e commit 845a952
Show file tree
Hide file tree
Showing 22 changed files with 1,982 additions and 231 deletions.
2,089 changes: 1,919 additions & 170 deletions Cargo.lock

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ homepage = "https://github.com/dwpeng/filterx"
exclude = [
"spec/",
"docs/",
"python/"
]

[dependencies]
anyhow = "1.0.89"
clap = { version = "4.5.17", features = ["derive"] }
polars = { version = "0.43.1", default-features = false, features = [
polars = { version = "0.44.2", default-features = false, features = [
"lazy",
"csv",
"strings",
Expand All @@ -31,10 +30,12 @@ polars = { version = "0.43.1", default-features = false, features = [
"dtype-i16",
"dtype-i8",
"abs",
"decompress"
"decompress",
"bitwise",
"describe",
] }
rustpython-parser = "0.4.0"
thiserror = "1.0.63"
thiserror = "2.0.0"
flate2 = { version = "1.0.34", features = ["zlib-rs"] }
regex = "1.11.1"
colored = "2.1.0"
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl<'a> Eval<'a> for ast::StmtAssign {
let new_col = eval!(vm, target, Call);

let new_col = match new_col {
Value::Column(col) => col.col_name,
Value::Item(col) => col.col_name,
Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/eval/call/builtin/col.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ pub fn col(vm: &mut Vm, args: &Vec<ast::Expr>) -> FilterxResult<value::Value> {
}
value::Value::Str(s) => s,
value::Value::Name(c) => c.name,
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
_ => {
let h = &mut vm.hint;
h.white("col only support column index, column name, or function which return a column name.").print_and_exit();
}
};

Ok(value::Value::Column(value::Column {
Ok(value::Value::Item(value::Item {
col_name: c,
data_type: None,
}))
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/dup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub fn dup<'a>(
let col = eval!(vm, arg, Name, Call);
let col = match col {
value::Value::Name(c) => c.name,
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
_ => {
let h = &mut vm.hint;
h.white("dup only support column name").print_and_exit();
Expand Down
9 changes: 6 additions & 3 deletions src/engine/eval/call/builtin/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use polars::prelude::*;

use polars::prelude::col;

fn compute_gc(s: Series) -> PolarsResult<Option<Series>> {
fn compute_gc(s: Column) -> PolarsResult<Option<Column>> {
if !s.dtype().is_string() {
return Err(PolarsError::InvalidOperation(
format!(
Expand All @@ -15,6 +15,9 @@ fn compute_gc(s: Series) -> PolarsResult<Option<Series>> {
.into(),
));
}

let s = s.as_series().unwrap();

let v = s
.iter()
.map(|seq| {
Expand All @@ -32,7 +35,7 @@ fn compute_gc(s: Series) -> PolarsResult<Option<Series>> {
return gc_base as f32 / seq.len() as f32;
})
.collect::<Vec<f32>>();
Ok(Some(Series::new("gc".into(), v)))
Ok(Some(Column::new("gc".into(), v)))
}

pub fn gc<'a>(vm: &'a mut Vm, args: &Vec<ast::Expr>) -> FilterxResult<value::Value> {
Expand All @@ -45,7 +48,7 @@ pub fn gc<'a>(vm: &'a mut Vm, args: &Vec<ast::Expr>) -> FilterxResult<value::Val
}
let col_name = eval!(vm, &args[0], Name, Call);
let col_name = match col_name {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub fn lower<'a>(

let col_name = eval!(vm, &args[0], Name, Call);
let col_name = match col_name {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/eval/call/builtin/rename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub fn rename(vm: &mut Vm, args: &Vec<ast::Expr>) -> FilterxResult<value::Value>
}
}
value::Value::Str(s) => s,
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(c) => c.name,
_ => {
let h = &mut vm.hint;
Expand All @@ -41,7 +41,7 @@ pub fn rename(vm: &mut Vm, args: &Vec<ast::Expr>) -> FilterxResult<value::Value>

let new_col = match new_col_value {
value::Value::Str(s) => s,
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn replace<'a>(

let col_name = eval!(vm, &args[0], Name, Call);
let col_name = match col_name {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/rev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn rev<'a>(
Call
);
let col_name = match col_name {
value::Value::Column(c) => c.col_name.to_string(),
value::Value::Item(c) => c.col_name.to_string(),
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
9 changes: 4 additions & 5 deletions src/engine/eval/call/builtin/revcomp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ use polars::error::PolarsResult;
use polars::prelude::col;
use polars::prelude::ChunkApply;
use polars::prelude::GetOutput;
use polars::series::IntoSeries;
use polars::series::Series;
use polars::prelude::*;

fn compute_revcomp(s: Series) -> PolarsResult<Option<Series>> {
fn compute_revcomp(s: Column) -> PolarsResult<Option<Column>> {
let ca = s.str()?;
let ca = ca.apply_values(|s| {
let s = s.chars().rev().collect::<String>();
Expand All @@ -30,7 +29,7 @@ fn compute_revcomp(s: Series) -> PolarsResult<Option<Series>> {
.collect::<String>();
Cow::Owned(s)
});
Ok(Some(ca.into_series()))
Ok(Some(ca.into_column()))
}

pub fn revcomp<'a>(
Expand All @@ -49,7 +48,7 @@ pub fn revcomp<'a>(

let col_name = eval!(vm, &args[0], Name, Call);
let col_name = match col_name {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub fn select<'a>(vm: &'a mut Vm, args: &Vec<ast::Expr>) -> FilterxResult<value:
for arg in args {
let col = eval!(vm, arg, Name, Call);
let col = match col {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(c) => c.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub fn slice<'a>(
let col_name = eval!(vm, &args[0], Name, Call, UnaryOp);
let col_name = match col_name {
value::Value::Name(n) => n.name,
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
_ => {
let h = &mut vm.hint;
h.white("slice: expected a column name as first argument")
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub fn sort(vm: &mut Vm, args: &Vec<ast::Expr>, incr: bool) -> FilterxResult<val
}
}
value::Value::Str(s) => s,
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(c) => c.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/strip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub fn strip<'a>(
);

let col_name = match col_name {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
2 changes: 1 addition & 1 deletion src/engine/eval/call/builtin/upper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn upper<'a>(
let col_name = eval!(vm, &args[0], Name, Call, UnaryOp);

let col_name = match col_name {
value::Value::Column(c) => c.col_name,
value::Value::Item(c) => c.col_name,
value::Value::Name(n) => n.name,
_ => {
let h = &mut vm.hint;
Expand Down
11 changes: 5 additions & 6 deletions src/engine/eval/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ impl<'a> Eval<'a> for ast::ExprUnaryOp {
let r = unary(v, self.op)?;
return Ok(r);
}
Value::Name(_) | Value::Column(_) => {
println!("{:?}", -v.expr()?);
Value::Name(_) | Value::Item(_) => {
return Ok(Value::Expr(-(v.expr()?)));
}
Value::Expr(e) => {
Expand Down Expand Up @@ -363,7 +362,7 @@ fn str_in_col<'a>(vm: &'a mut Vm, left: Value, right: Value, op: &CmpOp) -> Filt
_ => unreachable!(),
};
let right_col = match &right {
Value::Column(c) => c.col_name.clone(),
Value::Item(c) => c.col_name.clone(),
_ => unreachable!(),
};

Expand Down Expand Up @@ -436,7 +435,7 @@ fn compare_in_and_not_in_dataframe<'a>(
}
}
}
DataFrame::new(vec![Series::new(right_col.as_str().into(), v)])?
DataFrame::new(vec![Column::new(right_col.as_str().into(), v)])?
}
DataType::Int32 | DataType::Int64 => {
let mut v = Vec::new();
Expand All @@ -451,7 +450,7 @@ fn compare_in_and_not_in_dataframe<'a>(
}
}
}
DataFrame::new(vec![Series::new(right_col.as_str().into(), v)])?
DataFrame::new(vec![Column::new(right_col.as_str().into(), v)])?
}
DataType::String => {
let mut v = Vec::new();
Expand All @@ -467,7 +466,7 @@ fn compare_in_and_not_in_dataframe<'a>(
}
}
}
DataFrame::new(vec![Series::new(right_col.as_str().into(), v)])?
DataFrame::new(vec![Column::new(right_col.as_str().into(), v)])?
}
_ => {
return Err(FilterxError::RuntimeError(
Expand Down
22 changes: 11 additions & 11 deletions src/engine/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub enum Value {
Str(String),
Name(Name),
List(Vec<Value>),
Column(Column),
Item(Item),
Ident((String, Box<Value>)),
AttrMethod(AttrMethod),
File(File),
Expand Down Expand Up @@ -117,7 +117,7 @@ impl Value {
Value::Float(f) => f.lit(),
Value::Int(i) => i.lit(),
Value::Str(s) => s.clone().lit(),
Value::Column(c) => col(c.col_name.clone()),
Value::Item(c) => col(c.col_name.clone()),
Value::Name(n) => col(n.name.clone()),
Value::Expr(e) => e.clone(),
Value::Null => Expr::Literal(LiteralValue::Null),
Expand All @@ -130,7 +130,7 @@ impl Value {
pub fn text(&self) -> FilterxResult<String> {
match self {
Value::Str(s) => Ok(s.to_owned()),
Value::Column(c) => Ok(c.col_name.to_owned()),
Value::Item(c) => Ok(c.col_name.to_owned()),
Value::Name(n) => Ok(n.name.to_owned()),
_ => {
return Err(FilterxError::RuntimeError(
Expand Down Expand Up @@ -160,7 +160,7 @@ impl Value {

pub fn is_column(&self) -> bool {
match self {
Value::Column(_) => true,
Value::Item(_) => true,
Value::Name(_) => true,
_ => false,
}
Expand Down Expand Up @@ -212,14 +212,14 @@ impl Default for Slice {
}

#[derive(Debug, Clone, PartialEq)]
pub struct Column {
pub struct Item {
pub col_name: String,
pub data_type: Option<DataType>,
}

impl Column {
impl Item {
pub fn new(col_name: String) -> Self {
Column {
Item {
col_name,
data_type: None,
}
Expand All @@ -230,9 +230,9 @@ impl Column {
}
}

impl Default for Column {
impl Default for Item {
fn default() -> Self {
Column {
Item {
col_name: String::new(),
data_type: None,
}
Expand All @@ -241,7 +241,7 @@ impl Default for Column {

#[derive(Debug, PartialEq, Clone)]
pub struct AttrMethod {
pub col: Column,
pub col: Item,
pub method: String,
pub value: Vec<Value>,
}
Expand Down Expand Up @@ -324,7 +324,7 @@ impl Value {
s
}
Value::File(f) => f.file_name.clone(),
Value::Column(c) => c.col_name.clone(),
Value::Item(c) => c.col_name.clone(),
Value::Ident(i) => {
let mut s = String::from("(");
s.push_str(&i.0);
Expand Down
12 changes: 6 additions & 6 deletions src/source/block/fastx/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,11 @@ impl<'a> TableLike<'a> for Fasta {
}
let mut cols = Vec::with_capacity(3);
cols.append(&mut vec![
polars::prelude::Series::new("name".into(), names),
polars::prelude::Series::new("seq".into(), sequences),
polars::prelude::Column::new("name".into(), names),
polars::prelude::Column::new("seq".into(), sequences),
]);
if !comments.is_empty() {
cols.push(polars::prelude::Series::new("comment".into(), comments));
cols.push(polars::prelude::Column::new("comment".into(), comments));
}
let df = polars::prelude::DataFrame::new(cols)?;

Expand All @@ -344,11 +344,11 @@ impl<'a> TableLike<'a> for Fasta {
}
}
let mut cols = vec![
polars::prelude::Series::new("name".into(), headers),
polars::prelude::Series::new("seq".into(), sequences),
polars::prelude::Column::new("name".into(), headers),
polars::prelude::Column::new("seq".into(), sequences),
];
if comments.len() > 0 {
cols.push(polars::prelude::Series::new("comment".into(), comments));
cols.push(polars::prelude::Column::new("comment".into(), comments));
}
let df = polars::prelude::DataFrame::new(cols)?;
Ok(df)
Expand Down
Loading

0 comments on commit 845a952

Please sign in to comment.