Skip to content

Commit

Permalink
fix: improve separator detection and error handling in CSV processing
Browse files Browse the repository at this point in the history
  • Loading branch information
dwpeng committed Jan 8, 2025
1 parent 69ea98c commit 314ced3
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
11 changes: 8 additions & 3 deletions src/filterx/src/files/csv.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use crate::args::{CsvCommand, ShareArgs};
use filterx_core::{util, writer::FilterxWriter, FilterxResult};
use filterx_engine::vm::Vm;
use filterx_source::{detect_columns, DataframeSource, Source, SourceType};

use filterx_core::{util, writer::FilterxWriter, FilterxResult};

pub fn filterx_csv(cmd: CsvCommand) -> FilterxResult<()> {
let CsvCommand {
share_args:
Expand All @@ -25,7 +24,13 @@ pub fn filterx_csv(cmd: CsvCommand) -> FilterxResult<()> {
} = cmd;
let separator = match separator {
Some(s) => Some(s),
None => util::detect_separator(path.as_str(), 20)?,
None => match util::detect_separator(path.as_str(), 20)? {
Some(s) => Some(s),
None => {
eprintln!("Cannot detect separator, parse as one column.");
None
}
},
};
let output_separator;
if _output_separator.is_none() {
Expand Down
9 changes: 4 additions & 5 deletions src/filterx_core/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use polars::{
};

use crate::{
sep::Separator, thread_size::ThreadSize, writer::FilterxWriter, FilterxError, FilterxResult,
reader::FilterxReader, sep::Separator, thread_size::ThreadSize, writer::FilterxWriter,
FilterxError, FilterxResult,
};
use std::io::Write;
use std::num::NonZero;
Expand Down Expand Up @@ -146,10 +147,9 @@ pub fn write_df(
}

pub fn collect_comment_lines(path: &str, comment_prefix: &str) -> FilterxResult<Vec<String>> {
use std::fs::File;
use std::io::BufRead;
use std::io::BufReader;
let file = File::open(path)?;
let file = FilterxReader::new(path)?;
let mut reader = BufReader::new(file);
let mut line = String::new();
let mut comment_lines = Vec::new();
Expand All @@ -166,10 +166,9 @@ pub fn collect_comment_lines(path: &str, comment_prefix: &str) -> FilterxResult<
}

pub fn detect_separator(path: &str, nline: usize) -> FilterxResult<Option<String>> {
use std::fs::File;
use std::io::BufRead;
use std::io::BufReader;
let file = File::open(path)?;
let file = FilterxReader::new(path)?;
let mut reader = BufReader::new(file);
let mut line = String::new();
let mut lines = Vec::with_capacity(nline);
Expand Down

0 comments on commit 314ced3

Please sign in to comment.