From 888143931278f165d3456d4be3d66471fe2a0ca4 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 21 Nov 2022 11:32:59 +0000 Subject: [PATCH 01/10] Fix #245: return error for NaN in naive bayes --- src/lib.rs | 1 - src/naive_bayes/gaussian.rs | 21 +++++++++++++++++++++ src/naive_bayes/mod.rs | 18 +++++++++++++++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 03bfc03b..b8fe5b08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,6 @@ clippy::upper_case_acronyms )] #![warn(missing_docs)] -#![warn(rustdoc::missing_doc_code_examples)] //! # smartcore //! diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index a9c1d4fe..8a0a8e19 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -425,6 +425,27 @@ mod tests { ); } + #[test] + fn run_gaussian_naive_bayes_with_few_samples() { + let x = DenseMatrix::::from_2d_array(&[ + &[-1., -1.], + &[-2., -1.], + &[-3., -2.], + &[1., 1.], + ]); + let y: Vec = vec![1, 1, 1, 2]; + + let gnb = GaussianNB::fit(&x, &y, Default::default()); + + match gnb.unwrap().predict(&x) { + Ok(_) => assert!(false, "test should return Failed"), + Err(err) => { + assert!(err.to_string() == "Can't find solution: log_likelihood for distribution of one of the rows is NaN"); + assert!(true) + }, + } + } + #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index e7ab7f6d..5ce6af2f 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -35,7 +35,7 @@ //! //! //! -use crate::error::Failed; +use crate::error::{Failed, FailedError}; use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1}; use crate::numbers::basenum::Number; #[cfg(feature = "serde")] @@ -93,6 +93,7 @@ impl, Y: Array1, D: NBDistribution Result { let y_classes = self.distribution.classes(); let (rows, _) = x.shape(); + let mut log_likehood_is_nan = false; let predictions = (0..rows) .map(|row_index| { let row = x.get_row(row_index); @@ -100,17 +101,28 @@ impl, Y: Array1, D: NBDistribution>(); + if log_likehood_is_nan { + return Err(Failed::because( + FailedError::SolutionFailed, + "log_likelihood for distribution of one of the rows is NaN", + )); + } let y_hat = Y::from_vec_slice(&predictions); Ok(y_hat) } From 22c9640e5a0abb304ae5fce5ff050082b210d7c1 Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Mon, 21 Nov 2022 11:36:45 +0000 Subject: [PATCH 02/10] fmt --- src/naive_bayes/gaussian.rs | 12 ++++-------- src/naive_bayes/mod.rs | 8 ++++---- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index 8a0a8e19..be581829 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -427,12 +427,8 @@ mod tests { #[test] fn run_gaussian_naive_bayes_with_few_samples() { - let x = DenseMatrix::::from_2d_array(&[ - &[-1., -1.], - &[-2., -1.], - &[-3., -2.], - &[1., 1.], - ]); + let x = + DenseMatrix::::from_2d_array(&[&[-1., -1.], &[-2., -1.], &[-3., -2.], &[1., 1.]]); let y: Vec = vec![1, 1, 1, 2]; let gnb = GaussianNB::fit(&x, &y, Default::default()); @@ -440,9 +436,9 @@ mod tests { match gnb.unwrap().predict(&x) { Ok(_) => assert!(false, "test should return Failed"), Err(err) => { - assert!(err.to_string() == "Can't find solution: log_likelihood for distribution of one of the rows is NaN"); + assert!(err.to_string() == "Can't find solution: log_likelihood for distribution of one of the rows is NaN"); assert!(true) - }, + } } } diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 5ce6af2f..a476e8d2 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -101,17 +101,17 @@ impl, Y: Array1, D: NBDistribution Date: Wed, 22 Jan 2025 13:18:12 +0000 Subject: [PATCH 03/10] Implement error handling for NaN values in NBayes predict: * general behaviour has been kept unchanged according to original tests in `mod.rs` * aka: error is returned only if all the predicted probabilities are NaN --- .github/CONTRIBUTING.md | 2 + .github/workflows/ci.yml | 2 +- CHANGELOG.md | 6 + src/algorithm/neighbour/bbd_tree.rs | 7 +- src/algorithm/neighbour/cover_tree.rs | 8 +- src/algorithm/neighbour/fastpair.rs | 32 +- src/algorithm/neighbour/linear_search.rs | 4 +- src/algorithm/sort/heap_select.rs | 4 +- src/algorithm/sort/quick_sort.rs | 1 + src/cluster/dbscan.rs | 9 +- src/cluster/kmeans.rs | 20 +- src/dataset/diabetes.rs | 2 +- src/dataset/digits.rs | 8 +- src/dataset/iris.rs | 4 +- src/decomposition/pca.rs | 20 +- src/decomposition/svd.rs | 10 +- src/ensemble/random_forest_classifier.rs | 11 +- src/ensemble/random_forest_regressor.rs | 11 +- src/error/mod.rs | 19 + src/lib.rs | 2 +- src/linalg/basic/arrays.rs | 366 ++++++++++-------- src/linalg/basic/matrix.rs | 326 +++++++++++----- src/linalg/basic/vector.rs | 35 +- src/linalg/ndarray/matrix.rs | 16 +- src/linalg/ndarray/vector.rs | 12 +- src/linalg/traits/cholesky.rs | 18 +- src/linalg/traits/evd.rs | 24 +- src/linalg/traits/high_order.rs | 6 +- src/linalg/traits/lu.rs | 15 +- src/linalg/traits/qr.rs | 19 +- src/linalg/traits/stats.rs | 31 +- src/linalg/traits/svd.rs | 34 +- src/linear/bg_solver.rs | 16 +- src/linear/elastic_net.rs | 10 +- src/linear/lasso.rs | 3 +- src/linear/lasso_optimizer.rs | 14 +- src/linear/linear_regression.rs | 7 +- src/linear/logistic_regression.rs | 153 +++++--- src/linear/ridge_regression.rs | 7 +- src/metrics/distance/mahalanobis.rs | 5 +- src/metrics/mod.rs | 2 +- .../hyper_tuning/grid_search.rs | 4 +- src/model_selection/kfold.rs | 8 +- src/model_selection/mod.rs | 16 +- src/naive_bayes/bernoulli.rs | 22 +- src/naive_bayes/categorical.rs | 18 +- src/naive_bayes/gaussian.rs | 33 +- src/naive_bayes/mod.rs | 126 ++++-- src/naive_bayes/multinomial.rs | 22 +- src/neighbors/knn_classifier.rs | 17 +- src/neighbors/knn_regressor.rs | 24 +- .../first_order/gradient_descent.rs | 17 +- src/optimization/first_order/lbfgs.rs | 39 +- src/optimization/first_order/mod.rs | 16 +- src/optimization/line_search.rs | 29 +- src/optimization/mod.rs | 16 +- src/preprocessing/categorical.rs | 19 +- src/preprocessing/numerical.rs | 105 ++--- src/readers/csv.rs | 19 +- src/svm/mod.rs | 4 +- src/svm/svc.rs | 142 ++++--- src/svm/svr.rs | 30 +- src/tree/decision_tree_classifier.rs | 251 ++++++++++-- src/tree/decision_tree_regressor.rs | 26 +- 64 files changed, 1409 insertions(+), 895 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 15b39063..895db0f5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -37,6 +37,8 @@ $ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 ``` * find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. +* Please take a look to the output of a profiler to spot most evident performance problems, see [this guide about using a profiler](http://www.codeofview.com/fix-rs/2017/01/24/how-to-optimize-rust-programs-on-linux/). + ## Issue Report Process 1. Go to the project's issues. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89b3b37e..d7942c8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: - name: Install Rust toolchain uses: actions-rs/toolchain@v1 with: - toolchain: stable + toolchain: 1.81 # 1.82 seems to break wasm32 tests https://github.com/rustwasm/wasm-bindgen/issues/4274 target: ${{ matrix.platform.target }} profile: minimal default: true diff --git a/CHANGELOG.md b/CHANGELOG.md index d1054327..6df73a6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.0] - 2023-04-05 + +## Added +- WARNING: Breaking changes! +- `DenseMatrix` constructor now returns `Result` to avoid user instantiating inconsistent rows/cols count. Their return values need to be unwrapped with `unwrap()`, see tests + ## [0.3.0] - 2022-11-09 ## Added diff --git a/src/algorithm/neighbour/bbd_tree.rs b/src/algorithm/neighbour/bbd_tree.rs index 44cef506..790f6d39 100644 --- a/src/algorithm/neighbour/bbd_tree.rs +++ b/src/algorithm/neighbour/bbd_tree.rs @@ -40,11 +40,11 @@ impl BBDTreeNode { impl BBDTree { pub fn new>(data: &M) -> BBDTree { - let nodes = Vec::new(); + let nodes: Vec = Vec::new(); let (n, _) = data.shape(); - let index = (0..n).collect::>(); + let index = (0..n).collect::>(); let mut tree = BBDTree { nodes, @@ -343,7 +343,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let tree = BBDTree::new(&data); diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index 011a9cc0..9989ae24 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -124,7 +124,7 @@ impl> CoverTree { current_cover_set.push((d, &self.root)); let mut heap = HeapSelection::with_capacity(k); - heap.add(std::f64::MAX); + heap.add(f64::MAX); let mut empty_heap = true; if !self.identical_excluded || self.get_data_value(self.root.idx) != p { @@ -145,7 +145,7 @@ impl> CoverTree { } let upper_bound = if empty_heap { - std::f64::INFINITY + f64::INFINITY } else { *heap.peek() }; @@ -291,7 +291,7 @@ impl> CoverTree { } else { let max_dist = self.max(point_set); let next_scale = (max_scale - 1).min(self.get_scale(max_dist)); - if next_scale == std::i64::MIN { + if next_scale == i64::MIN { let mut children: Vec = Vec::new(); let mut leaf = self.new_leaf(p); children.push(leaf); @@ -435,7 +435,7 @@ impl> CoverTree { fn get_scale(&self, d: f64) -> i64 { if d == 0f64 { - std::i64::MIN + i64::MIN } else { (self.inv_log_base * d.ln()).ceil() as i64 } diff --git a/src/algorithm/neighbour/fastpair.rs b/src/algorithm/neighbour/fastpair.rs index eca73ed6..4e99261b 100644 --- a/src/algorithm/neighbour/fastpair.rs +++ b/src/algorithm/neighbour/fastpair.rs @@ -17,7 +17,7 @@ /// &[4.6, 3.1, 1.5, 0.2], /// &[5.0, 3.6, 1.4, 0.2], /// &[5.4, 3.9, 1.7, 0.4], -/// ]); +/// ]).unwrap(); /// let fastpair = FastPair::new(&x); /// let closest_pair: PairwiseDistance = fastpair.unwrap().closest_pair(); /// ``` @@ -52,10 +52,8 @@ pub struct FastPair<'a, T: RealNumber + FloatNumber, M: Array2> { } impl<'a, T: RealNumber + FloatNumber, M: Array2> FastPair<'a, T, M> { - /// /// Constructor - /// Instantiate and inizialise the algorithm - /// + /// Instantiate and initialize the algorithm pub fn new(m: &'a M) -> Result { if m.shape().0 < 3 { return Err(Failed::because( @@ -74,10 +72,8 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2> FastPair<'a, T, M> { Ok(init) } - /// /// Initialise `FastPair` by passing a `Array2`. /// Build a FastPairs data-structure from a set of (new) points. - /// fn init(&mut self) { // basic measures let len = self.samples.shape().0; @@ -158,9 +154,7 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2> FastPair<'a, T, M> { self.neighbours = neighbours; } - /// /// Find closest pair by scanning list of nearest neighbors. - /// #[allow(dead_code)] pub fn closest_pair(&self) -> PairwiseDistance { let mut a = self.neighbours[0]; // Start with first point @@ -217,10 +211,10 @@ mod tests_fastpair { use super::*; use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix}; - /// /// Brute force algorithm, used only for comparison and testing - /// - pub fn closest_pair_brute(fastpair: &FastPair>) -> PairwiseDistance { + pub fn closest_pair_brute( + fastpair: &FastPair<'_, f64, DenseMatrix>, + ) -> PairwiseDistance { use itertools::Itertools; let m = fastpair.samples.shape().0; @@ -271,7 +265,7 @@ mod tests_fastpair { fn dataset_has_at_least_three_points() { // Create a dataset which consists of only two points: // A(0.0, 0.0) and B(1.0, 1.0). - let dataset = DenseMatrix::::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]); + let dataset = DenseMatrix::::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]).unwrap(); // We expect an error when we run `FastPair` on this dataset, // becuase `FastPair` currently only works on a minimum of 3 @@ -288,7 +282,7 @@ mod tests_fastpair { #[test] fn one_dimensional_dataset_minimal() { - let dataset = DenseMatrix::::from_2d_array(&[&[0.0], &[2.0], &[9.0]]); + let dataset = DenseMatrix::::from_2d_array(&[&[0.0], &[2.0], &[9.0]]).unwrap(); let result = FastPair::new(&dataset); assert!(result.is_ok()); @@ -308,7 +302,8 @@ mod tests_fastpair { #[test] fn one_dimensional_dataset_2() { - let dataset = DenseMatrix::::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]); + let dataset = + DenseMatrix::::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]).unwrap(); let result = FastPair::new(&dataset); assert!(result.is_ok()); @@ -343,7 +338,8 @@ mod tests_fastpair { &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], - ]); + ]) + .unwrap(); let fastpair = FastPair::new(&x); assert!(fastpair.is_ok()); @@ -516,7 +512,8 @@ mod tests_fastpair { &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], - ]); + ]) + .unwrap(); // compute let fastpair = FastPair::new(&x); assert!(fastpair.is_ok()); @@ -564,7 +561,8 @@ mod tests_fastpair { &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], - ]); + ]) + .unwrap(); // compute let fastpair = FastPair::new(&x); assert!(fastpair.is_ok()); diff --git a/src/algorithm/neighbour/linear_search.rs b/src/algorithm/neighbour/linear_search.rs index b1ce7270..6bc82176 100644 --- a/src/algorithm/neighbour/linear_search.rs +++ b/src/algorithm/neighbour/linear_search.rs @@ -61,7 +61,7 @@ impl> LinearKNNSearch { for _ in 0..k { heap.add(KNNPoint { - distance: std::f64::INFINITY, + distance: f64::INFINITY, index: None, }); } @@ -215,7 +215,7 @@ mod tests { }; let point_inf = KNNPoint { - distance: std::f64::INFINITY, + distance: f64::INFINITY, index: Some(3), }; diff --git a/src/algorithm/sort/heap_select.rs b/src/algorithm/sort/heap_select.rs index 23d2704a..8a4ef78a 100644 --- a/src/algorithm/sort/heap_select.rs +++ b/src/algorithm/sort/heap_select.rs @@ -133,7 +133,7 @@ mod tests { #[test] fn test_add1() { let mut heap = HeapSelection::with_capacity(3); - heap.add(std::f64::INFINITY); + heap.add(f64::INFINITY); heap.add(-5f64); heap.add(4f64); heap.add(-1f64); @@ -151,7 +151,7 @@ mod tests { #[test] fn test_add2() { let mut heap = HeapSelection::with_capacity(3); - heap.add(std::f64::INFINITY); + heap.add(f64::INFINITY); heap.add(0.0); heap.add(8.4852); heap.add(5.6568); diff --git a/src/algorithm/sort/quick_sort.rs b/src/algorithm/sort/quick_sort.rs index 97d34e7c..e64c4243 100644 --- a/src/algorithm/sort/quick_sort.rs +++ b/src/algorithm/sort/quick_sort.rs @@ -3,6 +3,7 @@ use num_traits::Num; pub trait QuickArgSort { fn quick_argsort_mut(&mut self) -> Vec; + #[allow(dead_code)] fn quick_argsort(&self) -> Vec; } diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 0d84a613..2e2aac10 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -315,8 +315,7 @@ impl, Y: Array1, D: Distance>> } } - while !neighbors.is_empty() { - let neighbor = neighbors.pop().unwrap(); + while let Some(neighbor) = neighbors.pop() { let index = neighbor.0; if y[index] == outlier { @@ -443,7 +442,8 @@ mod tests { &[2.2, 1.2], &[1.8, 0.8], &[3.0, 5.0], - ]); + ]) + .unwrap(); let expected_labels = vec![1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0]; @@ -488,7 +488,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let dbscan = DBSCAN::fit(&x, Default::default()).unwrap(); diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index c2470abb..2fade68f 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -41,7 +41,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! //! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters //! let y_hat: Vec = kmeans.predict(&x).unwrap(); // use the same points for prediction @@ -96,7 +96,7 @@ impl, Y: Array1> PartialEq for KMeans< return false; } for j in 0..self.centroids[i].len() { - if (self.centroids[i][j] - other.centroids[i][j]).abs() > std::f64::EPSILON { + if (self.centroids[i][j] - other.centroids[i][j]).abs() > f64::EPSILON { return false; } } @@ -249,7 +249,7 @@ impl, Y: Array1> Predictor impl, Y: Array1> KMeans { /// Fit algorithm to _NxM_ matrix where _N_ is number of samples and _M_ is number of features. - /// * `data` - training instances to cluster + /// * `data` - training instances to cluster /// * `parameters` - cluster parameters pub fn fit(data: &X, parameters: KMeansParameters) -> Result, Failed> { let bbd = BBDTree::new(data); @@ -270,7 +270,7 @@ impl, Y: Array1> KMeans let (n, d) = data.shape(); - let mut distortion = std::f64::MAX; + let mut distortion = f64::MAX; let mut y = KMeans::::kmeans_plus_plus(data, parameters.k, parameters.seed); let mut size = vec![0; parameters.k]; let mut centroids = vec![vec![0f64; d]; parameters.k]; @@ -331,7 +331,7 @@ impl, Y: Array1> KMeans let mut row = vec![0f64; x.shape().1]; for i in 0..n { - let mut min_dist = std::f64::MAX; + let mut min_dist = f64::MAX; let mut best_cluster = 0; for j in 0..self.k { @@ -361,7 +361,7 @@ impl, Y: Array1> KMeans .cloned() .collect(); - let mut d = vec![std::f64::MAX; n]; + let mut d = vec![f64::MAX; n]; let mut row = vec![TX::zero(); data.shape().1]; for j in 1..k { @@ -424,7 +424,7 @@ mod tests { )] #[test] fn invalid_k() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert!(KMeans::, Vec>::fit( &x, @@ -492,7 +492,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let kmeans = KMeans::fit(&x, Default::default()).unwrap(); @@ -531,7 +532,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let kmeans: KMeans, Vec> = KMeans::fit(&x, Default::default()).unwrap(); diff --git a/src/dataset/diabetes.rs b/src/dataset/diabetes.rs index faf169eb..a95b5116 100644 --- a/src/dataset/diabetes.rs +++ b/src/dataset/diabetes.rs @@ -40,7 +40,7 @@ pub fn load_dataset() -> Dataset { target: y, num_samples, num_features, - feature_names: vec![ + feature_names: [ "Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6", ] .iter() diff --git a/src/dataset/digits.rs b/src/dataset/digits.rs index b3556e53..c32648cd 100644 --- a/src/dataset/digits.rs +++ b/src/dataset/digits.rs @@ -25,16 +25,14 @@ pub fn load_dataset() -> Dataset { target: y, num_samples, num_features, - feature_names: vec![ - "sepal length (cm)", + feature_names: ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", - "petal width (cm)", - ] + "petal width (cm)"] .iter() .map(|s| s.to_string()) .collect(), - target_names: vec!["setosa", "versicolor", "virginica"] + target_names: ["setosa", "versicolor", "virginica"] .iter() .map(|s| s.to_string()) .collect(), diff --git a/src/dataset/iris.rs b/src/dataset/iris.rs index fe60241a..75c58acc 100644 --- a/src/dataset/iris.rs +++ b/src/dataset/iris.rs @@ -36,7 +36,7 @@ pub fn load_dataset() -> Dataset { target: y, num_samples, num_features, - feature_names: vec![ + feature_names: [ "sepal length (cm)", "sepal width (cm)", "petal length (cm)", @@ -45,7 +45,7 @@ pub fn load_dataset() -> Dataset { .iter() .map(|s| s.to_string()) .collect(), - target_names: vec!["setosa", "versicolor", "virginica"] + target_names: ["setosa", "versicolor", "virginica"] .iter() .map(|s| s.to_string()) .collect(), diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index d4116ed5..11853648 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -35,7 +35,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! //! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2 //! @@ -443,6 +443,7 @@ mod tests { &[2.6, 53.0, 66.0, 10.8], &[6.8, 161.0, 60.0, 15.6], ]) + .unwrap() } #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), @@ -457,7 +458,8 @@ mod tests { &[0.9952, 0.0588], &[0.0463, 0.9769], &[0.0752, 0.2007], - ]); + ]) + .unwrap(); let pca = PCA::fit(&us_arrests, Default::default()).unwrap(); @@ -500,7 +502,8 @@ mod tests { -0.974080592182491, 0.0723250196376097, ], - ]); + ]) + .unwrap(); let expected_projection = DenseMatrix::from_2d_array(&[ &[-64.8022, -11.448, 2.4949, -2.4079], @@ -553,7 +556,8 @@ mod tests { &[91.5446, -22.9529, 0.402, -0.7369], &[118.1763, 5.5076, 2.7113, -0.205], &[10.4345, -5.9245, 3.7944, 0.5179], - ]); + ]) + .unwrap(); let expected_eigenvalues: Vec = vec![ 343544.6277001563, @@ -616,7 +620,8 @@ mod tests { -0.0881962972508558, -0.0096011588898465, ], - ]); + ]) + .unwrap(); let expected_projection = DenseMatrix::from_2d_array(&[ &[0.9856, -1.1334, 0.4443, -0.1563], @@ -669,7 +674,8 @@ mod tests { &[-2.1086, -1.4248, -0.1048, -0.1319], &[-2.0797, 0.6113, 0.1389, -0.1841], &[-0.6294, -0.321, 0.2407, 0.1667], - ]); + ]) + .unwrap(); let expected_eigenvalues: Vec = vec![ 2.480241579149493, @@ -732,7 +738,7 @@ mod tests { // &[4.9, 2.4, 3.3, 1.0], // &[6.6, 2.9, 4.6, 1.3], // &[5.2, 2.7, 3.9, 1.4], - // ]); + // ]).unwrap(); // let pca = PCA::fit(&iris, Default::default()).unwrap(); diff --git a/src/decomposition/svd.rs b/src/decomposition/svd.rs index a82dfbd0..259bfbc0 100644 --- a/src/decomposition/svd.rs +++ b/src/decomposition/svd.rs @@ -32,7 +32,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! //! let svd = SVD::fit(&iris, SVDParameters::default(). //! with_n_components(2)).unwrap(); // Reduce number of features to 2 @@ -292,7 +292,8 @@ mod tests { &[5.7, 81.0, 39.0, 9.3], &[2.6, 53.0, 66.0, 10.8], &[6.8, 161.0, 60.0, 15.6], - ]); + ]) + .unwrap(); let expected = DenseMatrix::from_2d_array(&[ &[243.54655757, -18.76673788], @@ -300,7 +301,8 @@ mod tests { &[305.93972467, -15.39087376], &[197.28420365, -11.66808306], &[293.43187394, 1.91163633], - ]); + ]) + .unwrap(); let svd = SVD::fit(&x, Default::default()).unwrap(); let x_transformed = svd.transform(&x).unwrap(); @@ -341,7 +343,7 @@ mod tests { // &[4.9, 2.4, 3.3, 1.0], // &[6.6, 2.9, 4.6, 1.3], // &[5.2, 2.7, 3.9, 1.4], - // ]); + // ]).unwrap(); // let svd = SVD::fit(&iris, Default::default()).unwrap(); diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index 6448b52e..dabb2480 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -33,7 +33,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -660,7 +660,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let classifier = RandomForestClassifier::fit( @@ -733,7 +734,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let classifier = RandomForestClassifier::fit( @@ -786,7 +788,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let forest = RandomForestClassifier::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index 926327e1..efc63d3d 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -29,7 +29,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! let y = vec![ //! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, //! 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9 @@ -574,7 +574,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -648,7 +649,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -702,7 +704,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, diff --git a/src/error/mod.rs b/src/error/mod.rs index 838df085..b6b1d982 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -32,6 +32,8 @@ pub enum FailedError { SolutionFailed, /// Error in input parameters ParametersError, + /// Invalid state error (should never happen) + InvalidStateError, } impl Failed { @@ -64,6 +66,22 @@ impl Failed { } } + /// new instance of `FailedError::ParametersError` + pub fn input(msg: &str) -> Self { + Failed { + err: FailedError::ParametersError, + msg: msg.to_string(), + } + } + + /// new instance of `FailedError::InvalidStateError` + pub fn invalid_state(msg: &str) -> Self { + Failed { + err: FailedError::InvalidStateError, + msg: msg.to_string(), + } + } + /// new instance of `err` pub fn because(err: FailedError, msg: &str) -> Self { Failed { @@ -97,6 +115,7 @@ impl fmt::Display for FailedError { FailedError::DecompositionFailed => "Decomposition failed", FailedError::SolutionFailed => "Can't find solution", FailedError::ParametersError => "Error in input, check parameters", + FailedError::InvalidStateError => "Invalid state, this should never happen", // useful in development phase of lib }; write!(f, "{failed_err_str}") } diff --git a/src/lib.rs b/src/lib.rs index 9c331dce..c6f9349c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,7 +63,7 @@ //! &[3., 4.], //! &[5., 6.], //! &[7., 8.], -//! &[9., 10.]]); +//! &[9., 10.]]).unwrap(); //! // Our classes are defined as a vector //! let y = vec![2, 2, 2, 3, 3]; //! diff --git a/src/linalg/basic/arrays.rs b/src/linalg/basic/arrays.rs index e11bb6cd..3c889722 100644 --- a/src/linalg/basic/arrays.rs +++ b/src/linalg/basic/arrays.rs @@ -188,8 +188,7 @@ pub trait ArrayView1: Array { _ => max, } }; - self.iterator(0) - .fold(T::min_value(), |max, x| max_f(max, x)) + self.iterator(0).fold(T::min_value(), max_f) } /// return min value from the view fn min(&self) -> T @@ -202,8 +201,7 @@ pub trait ArrayView1: Array { _ => min, } }; - self.iterator(0) - .fold(T::max_value(), |max, x| min_f(max, x)) + self.iterator(0).fold(T::max_value(), min_f) } /// return the position of the max value of the view fn argmax(&self) -> usize @@ -267,11 +265,11 @@ pub trait ArrayView1: Array { if p.is_infinite() && p.is_sign_positive() { self.iterator(0) .map(|x| x.to_f64().unwrap().abs()) - .fold(std::f64::NEG_INFINITY, |a, b| a.max(b)) + .fold(f64::NEG_INFINITY, |a, b| a.max(b)) } else if p.is_infinite() && p.is_sign_negative() { self.iterator(0) .map(|x| x.to_f64().unwrap().abs()) - .fold(std::f64::INFINITY, |a, b| a.min(b)) + .fold(f64::INFINITY, |a, b| a.min(b)) } else { let mut norm = 0f64; @@ -560,11 +558,11 @@ pub trait ArrayView2: Array: pub trait MutArrayView2: MutArray + ArrayView2 { - /// + /// copy values from another array fn copy_from(&mut self, other: &dyn Array) { self.iterator_mut(0) .zip(other.iterator(0)) .for_each(|(s, o)| *s = *o); } - /// + /// update view with absolute values fn abs_mut(&mut self) where T: Number + Signed, { self.iterator_mut(0).for_each(|v| *v = v.abs()); } - /// + /// update view values with opposite sign fn neg_mut(&mut self) where T: Number + Neg, { self.iterator_mut(0).for_each(|v| *v = -*v); } - /// + /// update view values at power `p` fn pow_mut(&mut self, p: T) where T: RealNumber, { self.iterator_mut(0).for_each(|v| *v = v.powf(p)); } - /// + /// scale view values fn scale_mut(&mut self, mean: &[T], std: &[T], axis: u8) where T: Number, @@ -786,27 +784,27 @@ pub trait MutArrayView2: /// Trait for mutable 1D-array view pub trait Array1: MutArrayView1 + Sized + Clone { - /// + /// return a view of the array fn slice<'a>(&'a self, range: Range) -> Box + 'a>; - /// + /// return a mutable view of the array fn slice_mut<'a>(&'a mut self, range: Range) -> Box + 'a>; - /// + /// fill array with a given value fn fill(len: usize, value: T) -> Self where Self: Sized; - /// + /// create array from iterator fn from_iterator>(iter: I, len: usize) -> Self where Self: Sized; - /// + /// create array from vector fn from_vec_slice(slice: &[T]) -> Self where Self: Sized; - /// + /// create array from slice fn from_slice(slice: &'_ dyn ArrayView1) -> Self where Self: Sized; - /// + /// create a zero array fn zeros(len: usize) -> Self where T: Number, @@ -814,7 +812,7 @@ pub trait Array1: MutArrayView1 + Sized + { Self::fill(len, T::zero()) } - /// + /// create an array of ones fn ones(len: usize) -> Self where T: Number, @@ -822,7 +820,7 @@ pub trait Array1: MutArrayView1 + Sized + { Self::fill(len, T::one()) } - /// + /// create an array of random values fn rand(len: usize) -> Self where T: RealNumber, @@ -830,7 +828,7 @@ pub trait Array1: MutArrayView1 + Sized + { Self::from_iterator((0..len).map(|_| T::rand()), len) } - /// + /// add a scalar to the array fn add_scalar(&self, x: T) -> Self where T: Number, @@ -840,7 +838,7 @@ pub trait Array1: MutArrayView1 + Sized + result.add_scalar_mut(x); result } - /// + /// subtract a scalar from the array fn sub_scalar(&self, x: T) -> Self where T: Number, @@ -850,7 +848,7 @@ pub trait Array1: MutArrayView1 + Sized + result.sub_scalar_mut(x); result } - /// + /// divide a scalar from the array fn div_scalar(&self, x: T) -> Self where T: Number, @@ -860,7 +858,7 @@ pub trait Array1: MutArrayView1 + Sized + result.div_scalar_mut(x); result } - /// + /// multiply a scalar to the array fn mul_scalar(&self, x: T) -> Self where T: Number, @@ -870,7 +868,7 @@ pub trait Array1: MutArrayView1 + Sized + result.mul_scalar_mut(x); result } - /// + /// sum of two arrays fn add(&self, other: &dyn Array) -> Self where T: Number, @@ -880,7 +878,7 @@ pub trait Array1: MutArrayView1 + Sized + result.add_mut(other); result } - /// + /// subtract two arrays fn sub(&self, other: &impl Array1) -> Self where T: Number, @@ -890,7 +888,7 @@ pub trait Array1: MutArrayView1 + Sized + result.sub_mut(other); result } - /// + /// multiply two arrays fn mul(&self, other: &dyn Array) -> Self where T: Number, @@ -900,7 +898,7 @@ pub trait Array1: MutArrayView1 + Sized + result.mul_mut(other); result } - /// + /// divide two arrays fn div(&self, other: &dyn Array) -> Self where T: Number, @@ -910,7 +908,7 @@ pub trait Array1: MutArrayView1 + Sized + result.div_mut(other); result } - /// + /// replace values with another array fn take(&self, index: &[usize]) -> Self where Self: Sized, @@ -922,7 +920,7 @@ pub trait Array1: MutArrayView1 + Sized + ); Self::from_iterator(index.iter().map(move |&i| *self.get(i)), index.len()) } - /// + /// create a view of the array with absolute values fn abs(&self) -> Self where T: Number + Signed, @@ -932,7 +930,7 @@ pub trait Array1: MutArrayView1 + Sized + result.abs_mut(); result } - /// + /// create a view of the array with opposite sign fn neg(&self) -> Self where T: Number + Neg, @@ -942,7 +940,7 @@ pub trait Array1: MutArrayView1 + Sized + result.neg_mut(); result } - /// + /// create a view of the array with values at power `p` fn pow(&self, p: T) -> Self where T: RealNumber, @@ -952,7 +950,7 @@ pub trait Array1: MutArrayView1 + Sized + result.pow_mut(p); result } - /// + /// apply argsort to the array fn argsort(&self) -> Vec where T: Number + PartialOrd, @@ -960,12 +958,12 @@ pub trait Array1: MutArrayView1 + Sized + let mut v = self.clone(); v.argsort_mut() } - /// + /// map values of the array fn map, F: FnMut(&T) -> O>(self, f: F) -> A { let len = self.shape(); A::from_iterator(self.iterator(0).map(f), len) } - /// + /// apply softmax to the array fn softmax(&self) -> Self where T: RealNumber, @@ -975,7 +973,7 @@ pub trait Array1: MutArrayView1 + Sized + result.softmax_mut(); result } - /// + /// multiply array by matrix fn xa(&self, a_transpose: bool, a: &dyn ArrayView2) -> Self where T: Number, @@ -1005,7 +1003,7 @@ pub trait Array1: MutArrayView1 + Sized + result } - /// + /// check if two arrays are approximately equal fn approximate_eq(&self, other: &Self, error: T) -> bool where T: Number + RealNumber, @@ -1017,13 +1015,13 @@ pub trait Array1: MutArrayView1 + Sized + /// Trait for mutable 2D-array view pub trait Array2: MutArrayView2 + Sized + Clone { - /// + /// fill 2d array with a given value fn fill(nrows: usize, ncols: usize, value: T) -> Self; - /// + /// get a view of the 2d array fn slice<'a>(&'a self, rows: Range, cols: Range) -> Box + 'a> where Self: Sized; - /// + /// get a mutable view of the 2d array fn slice_mut<'a>( &'a mut self, rows: Range, @@ -1031,31 +1029,31 @@ pub trait Array2: MutArrayView2 + Sized + ) -> Box + 'a> where Self: Sized; - /// + /// create 2d array from iterator fn from_iterator>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self; - /// + /// get row from 2d array fn get_row<'a>(&'a self, row: usize) -> Box + 'a> where Self: Sized; - /// + /// get column from 2d array fn get_col<'a>(&'a self, col: usize) -> Box + 'a> where Self: Sized; - /// + /// create a zero 2d array fn zeros(nrows: usize, ncols: usize) -> Self where T: Number, { Self::fill(nrows, ncols, T::zero()) } - /// + /// create a 2d array of ones fn ones(nrows: usize, ncols: usize) -> Self where T: Number, { Self::fill(nrows, ncols, T::one()) } - /// + /// create an identity matrix fn eye(size: usize) -> Self where T: Number, @@ -1068,29 +1066,29 @@ pub trait Array2: MutArrayView2 + Sized + matrix } - /// + /// create a 2d array of random values fn rand(nrows: usize, ncols: usize) -> Self where T: RealNumber, { Self::from_iterator((0..nrows * ncols).map(|_| T::rand()), nrows, ncols, 0) } - /// + /// crate from 2d slice fn from_slice(slice: &dyn ArrayView2) -> Self { let (nrows, ncols) = slice.shape(); Self::from_iterator(slice.iterator(0).cloned(), nrows, ncols, 0) } - /// + /// create from row fn from_row(slice: &dyn ArrayView1) -> Self { let ncols = slice.shape(); Self::from_iterator(slice.iterator(0).cloned(), 1, ncols, 0) } - /// + /// create from column fn from_column(slice: &dyn ArrayView1) -> Self { let nrows = slice.shape(); Self::from_iterator(slice.iterator(0).cloned(), nrows, 1, 0) } - /// + /// transpose 2d array fn transpose(&self) -> Self { let (nrows, ncols) = self.shape(); let mut m = Self::fill(ncols, nrows, *self.get((0, 0))); @@ -1101,7 +1099,7 @@ pub trait Array2: MutArrayView2 + Sized + } m } - /// + /// change shape of 2d array fn reshape(&self, nrows: usize, ncols: usize, axis: u8) -> Self { let (onrows, oncols) = self.shape(); @@ -1112,7 +1110,7 @@ pub trait Array2: MutArrayView2 + Sized + Self::from_iterator(self.iterator(0).cloned(), nrows, ncols, axis) } - /// + /// multiply two 2d arrays fn matmul(&self, other: &dyn ArrayView2) -> Self where T: Number, @@ -1138,7 +1136,7 @@ pub trait Array2: MutArrayView2 + Sized + result } - /// + /// matrix multiplication fn ab(&self, a_transpose: bool, b: &dyn ArrayView2, b_transpose: bool) -> Self where T: Number, @@ -1173,7 +1171,7 @@ pub trait Array2: MutArrayView2 + Sized + result } } - /// + /// matrix vector multiplication fn ax(&self, a_transpose: bool, x: &dyn ArrayView1) -> Self where T: Number, @@ -1201,7 +1199,7 @@ pub trait Array2: MutArrayView2 + Sized + } result } - /// + /// concatenate 1d array fn concatenate_1d<'a>(arrays: &'a [&'a dyn ArrayView1], axis: u8) -> Self { assert!( axis == 1 || axis == 0, @@ -1239,7 +1237,7 @@ pub trait Array2: MutArrayView2 + Sized + ), } } - /// + /// concatenate 2d array fn concatenate_2d<'a>(arrays: &'a [&'a dyn ArrayView2], axis: u8) -> Self { assert!( axis == 1 || axis == 0, @@ -1296,7 +1294,7 @@ pub trait Array2: MutArrayView2 + Sized + } } } - /// + /// merge 1d arrays fn merge_1d<'a>(&'a self, arrays: &'a [&'a dyn ArrayView1], axis: u8, append: bool) -> Self { assert!( axis == 1 || axis == 0, @@ -1364,7 +1362,7 @@ pub trait Array2: MutArrayView2 + Sized + } } } - /// + /// Stack arrays in sequence vertically fn v_stack(&self, other: &dyn ArrayView2) -> Self { let (nrows, ncols) = self.shape(); let (other_nrows, other_ncols) = other.shape(); @@ -1380,7 +1378,7 @@ pub trait Array2: MutArrayView2 + Sized + 0, ) } - /// + /// Stack arrays in sequence horizontally fn h_stack(&self, other: &dyn ArrayView2) -> Self { let (nrows, ncols) = self.shape(); let (other_nrows, other_ncols) = other.shape(); @@ -1396,20 +1394,20 @@ pub trait Array2: MutArrayView2 + Sized + 1, ) } - /// + /// map array values fn map, F: FnMut(&T) -> O>(self, f: F) -> A { let (nrows, ncols) = self.shape(); A::from_iterator(self.iterator(0).map(f), nrows, ncols, 0) } - /// + /// iter rows fn row_iter<'a>(&'a self) -> Box + 'a>> + 'a> { Box::new((0..self.shape().0).map(move |r| self.get_row(r))) } - /// + /// iter cols fn col_iter<'a>(&'a self) -> Box + 'a>> + 'a> { Box::new((0..self.shape().1).map(move |r| self.get_col(r))) } - /// + /// take elements from 2d array fn take(&self, index: &[usize], axis: u8) -> Self { let (nrows, ncols) = self.shape(); @@ -1449,7 +1447,7 @@ pub trait Array2: MutArrayView2 + Sized + fn take_column(&self, column_index: usize) -> Self { self.take(&[column_index], 1) } - /// + /// add a scalar to the array fn add_scalar(&self, x: T) -> Self where T: Number, @@ -1458,7 +1456,7 @@ pub trait Array2: MutArrayView2 + Sized + result.add_scalar_mut(x); result } - /// + /// subtract a scalar from the array fn sub_scalar(&self, x: T) -> Self where T: Number, @@ -1467,7 +1465,7 @@ pub trait Array2: MutArrayView2 + Sized + result.sub_scalar_mut(x); result } - /// + /// divide a scalar from the array fn div_scalar(&self, x: T) -> Self where T: Number, @@ -1476,7 +1474,7 @@ pub trait Array2: MutArrayView2 + Sized + result.div_scalar_mut(x); result } - /// + /// multiply a scalar to the array fn mul_scalar(&self, x: T) -> Self where T: Number, @@ -1485,7 +1483,7 @@ pub trait Array2: MutArrayView2 + Sized + result.mul_scalar_mut(x); result } - /// + /// sum of two arrays fn add(&self, other: &dyn Array) -> Self where T: Number, @@ -1494,7 +1492,7 @@ pub trait Array2: MutArrayView2 + Sized + result.add_mut(other); result } - /// + /// subtract two arrays fn sub(&self, other: &dyn Array) -> Self where T: Number, @@ -1503,7 +1501,7 @@ pub trait Array2: MutArrayView2 + Sized + result.sub_mut(other); result } - /// + /// multiply two arrays fn mul(&self, other: &dyn Array) -> Self where T: Number, @@ -1512,7 +1510,7 @@ pub trait Array2: MutArrayView2 + Sized + result.mul_mut(other); result } - /// + /// divide two arrays fn div(&self, other: &dyn Array) -> Self where T: Number, @@ -1521,7 +1519,7 @@ pub trait Array2: MutArrayView2 + Sized + result.div_mut(other); result } - /// + /// absolute values of the array fn abs(&self) -> Self where T: Number + Signed, @@ -1530,7 +1528,7 @@ pub trait Array2: MutArrayView2 + Sized + result.abs_mut(); result } - /// + /// negation of the array fn neg(&self) -> Self where T: Number + Neg, @@ -1539,7 +1537,7 @@ pub trait Array2: MutArrayView2 + Sized + result.neg_mut(); result } - /// + /// values at power `p` fn pow(&self, p: T) -> Self where T: RealNumber, @@ -1570,14 +1568,14 @@ pub trait Array2: MutArrayView2 + Sized + mean } - /// copy coumn as a vector + /// copy column as a vector fn copy_col_as_vec(&self, col: usize, result: &mut Vec) { for (r, result_r) in result.iter_mut().enumerate().take(self.shape().0) { *result_r = *self.get((r, col)); } } - /// appriximate equality of the elements of a matrix according to a given error + /// approximate equality of the elements of a matrix according to a given error fn approximate_eq(&self, other: &Self, error: T) -> bool where T: Number + RealNumber, @@ -1633,8 +1631,8 @@ mod tests { let v = vec![3., -2., 6.]; assert_eq!(v.norm(1.), 11.); assert_eq!(v.norm(2.), 7.); - assert_eq!(v.norm(std::f64::INFINITY), 6.); - assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.); + assert_eq!(v.norm(f64::INFINITY), 6.); + assert_eq!(v.norm(f64::NEG_INFINITY), 2.); } #[test] @@ -1777,7 +1775,7 @@ mod tests { #[test] fn test_xa() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!(vec![7, 8].xa(false, &a), vec![39, 54, 69]); assert_eq!(vec![7, 8, 9].xa(true, &a), vec![50, 122]); } @@ -1785,19 +1783,27 @@ mod tests { #[test] fn test_min_max() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).max(0), + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + .unwrap() + .max(0), vec!(4, 5, 6) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).max(1), + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + .unwrap() + .max(1), vec!(3, 6) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).min(0), + DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]) + .unwrap() + .min(0), vec!(1., 2., 3.) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).min(1), + DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]) + .unwrap() + .min(1), vec!(1., 4.) ); } @@ -1805,11 +1811,15 @@ mod tests { #[test] fn test_argmax() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 5, 3], &[4, 2, 6]]).argmax(0), + DenseMatrix::from_2d_array(&[&[1, 5, 3], &[4, 2, 6]]) + .unwrap() + .argmax(0), vec!(1, 0, 1) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[4, 2, 3], &[1, 5, 6]]).argmax(1), + DenseMatrix::from_2d_array(&[&[4, 2, 3], &[1, 5, 6]]) + .unwrap() + .argmax(1), vec!(0, 2) ); } @@ -1817,168 +1827,181 @@ mod tests { #[test] fn test_sum() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).sum(0), + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + .unwrap() + .sum(0), vec!(5, 7, 9) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).sum(1), + DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]) + .unwrap() + .sum(1), vec!(6., 15.) ); } #[test] fn test_abs() { - let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]); + let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]).unwrap(); x.abs_mut(); - assert_eq!(x, DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]])); + assert_eq!( + x, + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap() + ); } #[test] fn test_neg() { - let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]); + let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]).unwrap(); x.neg_mut(); - assert_eq!(x, DenseMatrix::from_2d_array(&[&[1, -2, 3], &[-4, 5, -6]])); + assert_eq!( + x, + DenseMatrix::from_2d_array(&[&[1, -2, 3], &[-4, 5, -6]]).unwrap() + ); } #[test] fn test_copy_from() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); let mut y = DenseMatrix::::zeros(2, 3); y.copy_from(&x); - assert_eq!(y, DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]])); + assert_eq!( + y, + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap() + ); } #[test] fn test_init() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( DenseMatrix::::zeros(2, 2), - DenseMatrix::from_2d_array(&[&[0, 0], &[0, 0]]) + DenseMatrix::from_2d_array(&[&[0, 0], &[0, 0]]).unwrap() ); assert_eq!( DenseMatrix::::ones(2, 2), - DenseMatrix::from_2d_array(&[&[1, 1], &[1, 1]]) + DenseMatrix::from_2d_array(&[&[1, 1], &[1, 1]]).unwrap() ); assert_eq!( DenseMatrix::::eye(3), - DenseMatrix::from_2d_array(&[&[1, 0, 0], &[0, 1, 0], &[0, 0, 1]]) + DenseMatrix::from_2d_array(&[&[1, 0, 0], &[0, 1, 0], &[0, 0, 1]]).unwrap() ); assert_eq!( - DenseMatrix::from_slice(x.slice(0..2, 0..2).as_ref()), - DenseMatrix::from_2d_array(&[&[1, 2], &[4, 5]]) + DenseMatrix::from_slice(x.slice(0..2, 0..2).as_ref()), // internal only? + DenseMatrix::from_2d_array(&[&[1, 2], &[4, 5]]).unwrap() ); assert_eq!( - DenseMatrix::from_row(x.get_row(0).as_ref()), - DenseMatrix::from_2d_array(&[&[1, 2, 3]]) + DenseMatrix::from_row(x.get_row(0).as_ref()), // internal only? + DenseMatrix::from_2d_array(&[&[1, 2, 3]]).unwrap() ); assert_eq!( - DenseMatrix::from_column(x.get_col(0).as_ref()), - DenseMatrix::from_2d_array(&[&[1], &[4]]) + DenseMatrix::from_column(x.get_col(0).as_ref()), // internal only? + DenseMatrix::from_2d_array(&[&[1], &[4]]).unwrap() ); } #[test] fn test_transpose() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( x.transpose(), - DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]) + DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]).unwrap() ); } #[test] fn test_reshape() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( x.reshape(3, 2, 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap() ); assert_eq!( x.reshape(3, 2, 1), - DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]) + DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]).unwrap() ); } #[test] #[should_panic] fn test_failed_reshape() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( x.reshape(4, 2, 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap() ); } #[test] fn test_matmul() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); assert_eq!( a.matmul(&(*b.slice(0..3, 0..2))), - DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]) + DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]).unwrap() ); assert_eq!( a.matmul(&b), - DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]) + DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]).unwrap() ); } #[test] fn test_concat() { - let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]); - let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]).unwrap(); assert_eq!( DenseMatrix::concatenate_1d(&[&vec!(1, 2, 3), &vec!(4, 5, 6)], 0), - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap() ); assert_eq!( DenseMatrix::concatenate_1d(&[&vec!(1, 2), &vec!(3, 4)], 1), - DenseMatrix::from_2d_array(&[&[1, 3], &[2, 4]]) + DenseMatrix::from_2d_array(&[&[1, 3], &[2, 4]]).unwrap() ); assert_eq!( DenseMatrix::concatenate_2d(&[&a, &b], 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]).unwrap() ); assert_eq!( DenseMatrix::concatenate_2d(&[&a, &b], 1), - DenseMatrix::from_2d_array(&[&[1, 2, 5, 6], &[3, 4, 7, 8]]) + DenseMatrix::from_2d_array(&[&[1, 2, 5, 6], &[3, 4, 7, 8]]).unwrap() ); } #[test] fn test_take() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); assert_eq!( a.take(&[0, 2], 1), - DenseMatrix::from_2d_array(&[&[1, 3], &[4, 6]]) + DenseMatrix::from_2d_array(&[&[1, 3], &[4, 6]]).unwrap() ); assert_eq!( b.take(&[0, 2], 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[5, 6]]).unwrap() ); } #[test] fn test_merge() { - let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]), + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 0, true) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8], &[1, 2], &[3, 4]]), + DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8], &[1, 2], &[3, 4]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 0, false) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 5, 7], &[3, 4, 6, 8]]), + DenseMatrix::from_2d_array(&[&[1, 2, 5, 7], &[3, 4, 6, 8]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 1, true) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[5, 7, 1, 2], &[6, 8, 3, 4]]), + DenseMatrix::from_2d_array(&[&[5, 7, 1, 2], &[6, 8, 3, 4]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 1, false) ); } @@ -1986,20 +2009,28 @@ mod tests { #[test] fn test_ops() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).mul_scalar(2), - DenseMatrix::from_2d_array(&[&[2, 4], &[6, 8]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .mul_scalar(2), + DenseMatrix::from_2d_array(&[&[2, 4], &[6, 8]]).unwrap() ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).add_scalar(2), - DenseMatrix::from_2d_array(&[&[3, 4], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .add_scalar(2), + DenseMatrix::from_2d_array(&[&[3, 4], &[5, 6]]).unwrap() ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).sub_scalar(1), - DenseMatrix::from_2d_array(&[&[0, 1], &[2, 3]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .sub_scalar(1), + DenseMatrix::from_2d_array(&[&[0, 1], &[2, 3]]).unwrap() ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).div_scalar(2), - DenseMatrix::from_2d_array(&[&[0, 1], &[1, 2]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .div_scalar(2), + DenseMatrix::from_2d_array(&[&[0, 1], &[1, 2]]).unwrap() ); } @@ -2013,42 +2044,45 @@ mod tests { #[test] fn test_vstack() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); let expected = DenseMatrix::from_2d_array(&[ &[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[1, 2, 3], &[4, 5, 6], - ]); + ]) + .unwrap(); let result = a.v_stack(&b); assert_eq!(result, expected); } #[test] fn test_hstack() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); let expected = - DenseMatrix::from_2d_array(&[&[1, 2, 3, 1, 2], &[4, 5, 6, 3, 4], &[7, 8, 9, 5, 6]]); + DenseMatrix::from_2d_array(&[&[1, 2, 3, 1, 2], &[4, 5, 6, 3, 4], &[7, 8, 9, 5, 6]]) + .unwrap(); let result = a.h_stack(&b); assert_eq!(result, expected); } #[test] fn test_map() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]).unwrap(); let result: DenseMatrix = a.map(|&v| v as f64); assert_eq!(result, expected); } #[test] fn scale() { - let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); - let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]); - let expected_1 = DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]); + let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).unwrap(); + let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]).unwrap(); + let expected_1 = + DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]).unwrap(); { let mut m = m.clone(); @@ -2062,52 +2096,52 @@ mod tests { #[test] fn test_pow_mut() { - let mut a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]); + let mut a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]).unwrap(); a.pow_mut(2.0); assert_eq!( a, - DenseMatrix::from_2d_array(&[&[1.0, 4.0, 9.0], &[16.0, 25.0, 36.0]]) + DenseMatrix::from_2d_array(&[&[1.0, 4.0, 9.0], &[16.0, 25.0, 36.0]]).unwrap() ); } #[test] fn test_ab() { - let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]); - let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]).unwrap(); assert_eq!( a.ab(false, &b, false), - DenseMatrix::from_2d_array(&[&[19, 22], &[43, 50]]) + DenseMatrix::from_2d_array(&[&[19, 22], &[43, 50]]).unwrap() ); assert_eq!( a.ab(true, &b, false), - DenseMatrix::from_2d_array(&[&[26, 30], &[38, 44]]) + DenseMatrix::from_2d_array(&[&[26, 30], &[38, 44]]).unwrap() ); assert_eq!( a.ab(false, &b, true), - DenseMatrix::from_2d_array(&[&[17, 23], &[39, 53]]) + DenseMatrix::from_2d_array(&[&[17, 23], &[39, 53]]).unwrap() ); assert_eq!( a.ab(true, &b, true), - DenseMatrix::from_2d_array(&[&[23, 31], &[34, 46]]) + DenseMatrix::from_2d_array(&[&[23, 31], &[34, 46]]).unwrap() ); } #[test] fn test_ax() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( a.ax(false, &vec![7, 8, 9]).transpose(), - DenseMatrix::from_2d_array(&[&[50, 122]]) + DenseMatrix::from_2d_array(&[&[50, 122]]).unwrap() ); assert_eq!( a.ax(true, &vec![7, 8]).transpose(), - DenseMatrix::from_2d_array(&[&[39, 54, 69]]) + DenseMatrix::from_2d_array(&[&[39, 54, 69]]).unwrap() ); } #[test] fn diag() { - let x = DenseMatrix::from_2d_array(&[&[0, 1, 2], &[3, 4, 5], &[6, 7, 8]]); + let x = DenseMatrix::from_2d_array(&[&[0, 1, 2], &[3, 4, 5], &[6, 7, 8]]).unwrap(); assert_eq!(x.diag(), vec![0, 4, 8]); } @@ -2119,13 +2153,15 @@ mod tests { &[68, 590, 37], &[69, 660, 46], &[73, 600, 55], - ]); + ]) + .unwrap(); let mut result = DenseMatrix::zeros(3, 3); let expected = DenseMatrix::from_2d_array(&[ &[11.5, 50.0, 34.75], &[50.0, 1250.0, 205.0], &[34.75, 205.0, 110.0], - ]); + ]) + .unwrap(); a.cov(&mut result); diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index f21e04fe..88a0849c 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -19,6 +19,8 @@ use crate::linalg::traits::svd::SVDDecomposable; use crate::numbers::basenum::Number; use crate::numbers::realnum::RealNumber; +use crate::error::Failed; + /// Dense matrix #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] @@ -50,26 +52,26 @@ pub struct DenseMatrixMutView<'a, T: Debug + Display + Copy + Sized> { } impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> { - fn new(m: &'a DenseMatrix, rows: Range, cols: Range) -> Self { - let (start, end, stride) = if m.column_major { - ( - rows.start + cols.start * m.nrows, - rows.end + (cols.end - 1) * m.nrows, - m.nrows, - ) + fn new( + m: &'a DenseMatrix, + vrows: Range, + vcols: Range, + ) -> Result { + if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) { + Err(Failed::input( + "The specified view is outside of the matrix range", + )) } else { - ( - rows.start * m.ncols + cols.start, - (rows.end - 1) * m.ncols + cols.end, - m.ncols, - ) - }; - DenseMatrixView { - values: &m.values[start..end], - stride, - nrows: rows.end - rows.start, - ncols: cols.end - cols.start, - column_major: m.column_major, + let (start, end, stride) = + m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major); + + Ok(DenseMatrixView { + values: &m.values[start..end], + stride, + nrows: vrows.end - vrows.start, + ncols: vcols.end - vcols.start, + column_major: m.column_major, + }) } } @@ -89,7 +91,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> { } } -impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, T> { +impl fmt::Display for DenseMatrixView<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, @@ -102,26 +104,26 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, } impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { - fn new(m: &'a mut DenseMatrix, rows: Range, cols: Range) -> Self { - let (start, end, stride) = if m.column_major { - ( - rows.start + cols.start * m.nrows, - rows.end + (cols.end - 1) * m.nrows, - m.nrows, - ) + fn new( + m: &'a mut DenseMatrix, + vrows: Range, + vcols: Range, + ) -> Result { + if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) { + Err(Failed::input( + "The specified view is outside of the matrix range", + )) } else { - ( - rows.start * m.ncols + cols.start, - (rows.end - 1) * m.ncols + cols.end, - m.ncols, - ) - }; - DenseMatrixMutView { - values: &mut m.values[start..end], - stride, - nrows: rows.end - rows.start, - ncols: cols.end - cols.start, - column_major: m.column_major, + let (start, end, stride) = + m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major); + + Ok(DenseMatrixMutView { + values: &mut m.values[start..end], + stride, + nrows: vrows.end - vrows.start, + ncols: vcols.end - vcols.start, + column_major: m.column_major, + }) } } @@ -140,7 +142,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { } } - fn iter_mut<'b>(&'b mut self, axis: u8) -> Box + 'b> { + fn iter_mut<'b>(&'b mut self, axis: u8) -> Box + 'b> { let column_major = self.column_major; let stride = self.stride; let ptr = self.values.as_mut_ptr(); @@ -167,7 +169,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { } } -impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'a, T> { +impl fmt::Display for DenseMatrixMutView<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, @@ -182,42 +184,102 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView< impl DenseMatrix { /// Create new instance of `DenseMatrix` without copying data. /// `values` should be in column-major order. - pub fn new(nrows: usize, ncols: usize, values: Vec, column_major: bool) -> Self { - DenseMatrix { - ncols, - nrows, - values, - column_major, + pub fn new( + nrows: usize, + ncols: usize, + values: Vec, + column_major: bool, + ) -> Result { + let data_len = values.len(); + if nrows * ncols != values.len() { + Err(Failed::input(&format!( + "The specified shape: (cols: {ncols}, rows: {nrows}) does not align with data len: {data_len}" + ))) + } else { + Ok(DenseMatrix { + ncols, + nrows, + values, + column_major, + }) } } /// New instance of `DenseMatrix` from 2d array. - pub fn from_2d_array(values: &[&[T]]) -> Self { + pub fn from_2d_array(values: &[&[T]]) -> Result { DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect()) } /// New instance of `DenseMatrix` from 2d vector. - pub fn from_2d_vec(values: &Vec>) -> Self { - let nrows = values.len(); - let ncols = values - .first() - .unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")) - .len(); - let mut m_values = Vec::with_capacity(nrows * ncols); - - for c in 0..ncols { - for r in values.iter().take(nrows) { - m_values.push(r[c]) + #[allow(clippy::ptr_arg)] + pub fn from_2d_vec(values: &Vec>) -> Result { + if values.is_empty() || values[0].is_empty() { + Err(Failed::input( + "The 2d vec provided is empty; cannot instantiate the matrix", + )) + } else { + let nrows = values.len(); + let ncols = values + .first() + .unwrap_or_else(|| { + panic!("Invalid state: Cannot create 2d matrix from an empty vector") + }) + .len(); + let mut m_values = Vec::with_capacity(nrows * ncols); + + for c in 0..ncols { + for r in values.iter().take(nrows) { + m_values.push(r[c]) + } } - } - DenseMatrix::new(nrows, ncols, m_values, true) + DenseMatrix::new(nrows, ncols, m_values, true) + } } /// Iterate over values of matrix pub fn iter(&self) -> Iter<'_, T> { self.values.iter() } + + /// Check if the size of the requested view is bounded to matrix rows/cols count + fn is_valid_view( + &self, + n_rows: usize, + n_cols: usize, + vrows: &Range, + vcols: &Range, + ) -> bool { + !(vrows.end <= n_rows + && vcols.end <= n_cols + && vrows.start <= n_rows + && vcols.start <= n_cols) + } + + /// Compute the range of the requested view: start, end, size of the slice + fn stride_range( + &self, + n_rows: usize, + n_cols: usize, + vrows: &Range, + vcols: &Range, + column_major: bool, + ) -> (usize, usize, usize) { + let (start, end, stride) = if column_major { + ( + vrows.start + vcols.start * n_rows, + vrows.end + (vcols.end - 1) * n_rows, + n_rows, + ) + } else { + ( + vrows.start * n_cols + vcols.start, + (vrows.end - 1) * n_cols + vcols.end, + n_cols, + ) + }; + (start, end, stride) + } } impl fmt::Display for DenseMatrix { @@ -304,6 +366,7 @@ where impl Array for DenseMatrix { fn get(&self, pos: (usize, usize)) -> &T { let (row, col) = pos; + if row >= self.nrows || col >= self.ncols { panic!( "Invalid index ({},{}) for {}x{} matrix", @@ -383,15 +446,15 @@ impl MutArrayView2 for DenseMatrix {} impl Array2 for DenseMatrix { fn get_row<'a>(&'a self, row: usize) -> Box + 'a> { - Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols)) + Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols).unwrap()) } fn get_col<'a>(&'a self, col: usize) -> Box + 'a> { - Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1)) + Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1).unwrap()) } fn slice<'a>(&'a self, rows: Range, cols: Range) -> Box + 'a> { - Box::new(DenseMatrixView::new(self, rows, cols)) + Box::new(DenseMatrixView::new(self, rows, cols).unwrap()) } fn slice_mut<'a>( @@ -402,15 +465,17 @@ impl Array2 for DenseMatrix { where Self: Sized, { - Box::new(DenseMatrixMutView::new(self, rows, cols)) + Box::new(DenseMatrixMutView::new(self, rows, cols).unwrap()) } + // private function so for now assume infalible fn fill(nrows: usize, ncols: usize, value: T) -> Self { - DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true) + DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true).unwrap() } + // private function so for now assume infalible fn from_iterator>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self { - DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0) + DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0).unwrap() } fn transpose(&self) -> Self { @@ -428,12 +493,12 @@ impl EVDDecomposable for DenseMatrix {} impl LUDecomposable for DenseMatrix {} impl SVDDecomposable for DenseMatrix {} -impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView<'a, T> { +impl Array for DenseMatrixView<'_, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { - &self.values[(pos.0 + pos.1 * self.stride)] + &self.values[pos.0 + pos.1 * self.stride] } else { - &self.values[(pos.0 * self.stride + pos.1)] + &self.values[pos.0 * self.stride + pos.1] } } @@ -450,7 +515,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMa } } -impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView<'a, T> { +impl Array for DenseMatrixView<'_, T> { fn get(&self, i: usize) -> &T { if self.nrows == 1 { if self.column_major { @@ -488,16 +553,16 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView< } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for DenseMatrixView<'a, T> {} +impl ArrayView2 for DenseMatrixView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for DenseMatrixView<'a, T> {} +impl ArrayView1 for DenseMatrixView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixMutView<'a, T> { +impl Array for DenseMatrixMutView<'_, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { - &self.values[(pos.0 + pos.1 * self.stride)] + &self.values[pos.0 + pos.1 * self.stride] } else { - &self.values[(pos.0 * self.stride + pos.1)] + &self.values[pos.0 * self.stride + pos.1] } } @@ -514,14 +579,12 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMa } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray - for DenseMatrixMutView<'a, T> -{ +impl MutArray for DenseMatrixMutView<'_, T> { fn set(&mut self, pos: (usize, usize), x: T) { if self.column_major { - self.values[(pos.0 + pos.1 * self.stride)] = x; + self.values[pos.0 + pos.1 * self.stride] = x; } else { - self.values[(pos.0 * self.stride + pos.1)] = x; + self.values[pos.0 * self.stride + pos.1] = x; } } @@ -530,29 +593,89 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray } } -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2 for DenseMatrixMutView<'a, T> {} +impl MutArrayView2 for DenseMatrixMutView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for DenseMatrixMutView<'a, T> {} +impl ArrayView2 for DenseMatrixMutView<'_, T> {} impl MatrixStats for DenseMatrix {} impl MatrixPreprocessing for DenseMatrix {} #[cfg(test)] +#[warn(clippy::reversed_empty_ranges)] mod tests { use super::*; use approx::relative_eq; #[test] - fn test_display() { + fn test_instantiate_from_2d() { let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); + assert!(x.is_ok()); + } + #[test] + fn test_instantiate_from_2d_empty() { + let input: &[&[f64]] = &[&[]]; + let x = DenseMatrix::from_2d_array(input); + assert!(x.is_err()); + } + #[test] + fn test_instantiate_from_2d_empty2() { + let input: &[&[f64]] = &[&[], &[]]; + let x = DenseMatrix::from_2d_array(input); + assert!(x.is_err()); + } + #[test] + fn test_instantiate_ok_view1() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..2, 0..2); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_ok_view2() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..3, 0..3); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_ok_view3() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 2..3, 0..3); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_ok_view4() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 3..3, 0..3); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_err_view1() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 3..4, 0..3); + assert!(v.is_err()); + } + #[test] + fn test_instantiate_err_view2() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..3, 3..4); + assert!(v.is_err()); + } + #[test] + fn test_instantiate_err_view3() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..3, 4..3); + assert!(v.is_err()); + } + #[test] + fn test_display() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); println!("{}", &x); } #[test] fn test_get_row_col() { - let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); assert_eq!(15.0, x.get_col(1).sum()); assert_eq!(15.0, x.get_row(1).sum()); @@ -561,7 +684,7 @@ mod tests { #[test] fn test_row_major() { - let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false); + let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false).unwrap(); assert_eq!(5, *x.get_col(1).get(1)); assert_eq!(7, x.get_col(1).sum()); @@ -575,7 +698,8 @@ mod tests { #[test] fn test_get_slice() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]) + .unwrap(); assert_eq!( vec![4, 5, 6], @@ -589,7 +713,7 @@ mod tests { #[test] fn test_iter_mut() { - let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); + let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); assert_eq!(vec![1, 4, 7, 2, 5, 8, 3, 6, 9], x.values); // add +2 to some elements @@ -625,7 +749,8 @@ mod tests { #[test] fn test_str_array() { let mut x = - DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]]); + DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]]) + .unwrap(); assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values); x.iterator_mut(0).for_each(|v| *v = "str"); @@ -637,7 +762,7 @@ mod tests { #[test] fn test_transpose() { - let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]); + let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]).unwrap(); assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values); assert!(x.column_major); @@ -650,7 +775,7 @@ mod tests { #[test] fn test_from_iterator() { - let data = vec![1, 2, 3, 4, 5, 6]; + let data = [1, 2, 3, 4, 5, 6]; let m = DenseMatrix::from_iterator(data.iter(), 2, 3, 0); @@ -664,8 +789,8 @@ mod tests { #[test] fn test_take() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); println!("{a}"); // take column 0 and 2 @@ -677,7 +802,7 @@ mod tests { #[test] fn test_mut() { - let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]); + let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]).unwrap(); let a = a.abs(); assert_eq!(vec![1.3, 4.0, 2.1, 5.3, 3.4, 6.1], a.values); @@ -688,7 +813,8 @@ mod tests { #[test] fn test_reshape() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]) + .unwrap(); let a = a.reshape(2, 6, 0); assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values); @@ -701,13 +827,15 @@ mod tests { #[test] fn test_eq() { - let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); - let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); + let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); let c = DenseMatrix::from_2d_array(&[ &[1. + f32::EPSILON, 2., 3.], &[4., 5., 6. + f32::EPSILON], - ]); - let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]); + ]) + .unwrap(); + let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]) + .unwrap(); assert!(!relative_eq!(a, b)); assert!(!relative_eq!(a, d)); diff --git a/src/linalg/basic/vector.rs b/src/linalg/basic/vector.rs index 99da9819..d2e0bae6 100644 --- a/src/linalg/basic/vector.rs +++ b/src/linalg/basic/vector.rs @@ -15,6 +15,25 @@ pub struct VecView<'a, T: Debug + Display + Copy + Sized> { ptr: &'a [T], } +impl Array for &[T] { + fn get(&self, i: usize) -> &T { + &self[i] + } + + fn shape(&self) -> usize { + self.len() + } + + fn is_empty(&self) -> bool { + self.len() > 0 + } + + fn iterator<'b>(&'b self, axis: u8) -> Box + 'b> { + assert!(axis == 0, "For one dimensional array `axis` should == 0"); + Box::new(self.iter()) + } +} + impl Array for Vec { fn get(&self, i: usize) -> &T { &self[i] @@ -36,6 +55,7 @@ impl Array for Vec { impl MutArray for Vec { fn set(&mut self, i: usize, x: T) { + // NOTE: this panics in case of out of bounds index self[i] = x } @@ -46,6 +66,7 @@ impl MutArray for Vec { } impl ArrayView1 for Vec {} +impl ArrayView1 for &[T] {} impl MutArrayView1 for Vec {} @@ -98,7 +119,7 @@ impl Array1 for Vec { } } -impl<'a, T: Debug + Display + Copy + Sized> Array for VecMutView<'a, T> { +impl Array for VecMutView<'_, T> { fn get(&self, i: usize) -> &T { &self.ptr[i] } @@ -117,7 +138,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for VecMutView<'a, T } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray for VecMutView<'a, T> { +impl MutArray for VecMutView<'_, T> { fn set(&mut self, i: usize, x: T) { self.ptr[i] = x; } @@ -128,10 +149,10 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray for VecMutView<'a } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for VecMutView<'a, T> {} -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1 for VecMutView<'a, T> {} +impl ArrayView1 for VecMutView<'_, T> {} +impl MutArrayView1 for VecMutView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> Array for VecView<'a, T> { +impl Array for VecView<'_, T> { fn get(&self, i: usize) -> &T { &self.ptr[i] } @@ -150,7 +171,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for VecView<'a, T> { } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for VecView<'a, T> {} +impl ArrayView1 for VecView<'_, T> {} #[cfg(test)] mod tests { @@ -191,7 +212,7 @@ mod tests { #[test] fn test_len() { - let x = vec![1, 2, 3]; + let x = [1, 2, 3]; assert_eq!(3, x.len()); } diff --git a/src/linalg/ndarray/matrix.rs b/src/linalg/ndarray/matrix.rs index adc8d7e8..5040497a 100644 --- a/src/linalg/ndarray/matrix.rs +++ b/src/linalg/ndarray/matrix.rs @@ -68,7 +68,7 @@ impl ArrayView2 for ArrayBase impl MutArrayView2 for ArrayBase, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayView<'a, T, Ix2> { +impl BaseArray for ArrayView<'_, T, Ix2> { fn get(&self, pos: (usize, usize)) -> &T { &self[[pos.0, pos.1]] } @@ -144,11 +144,9 @@ impl EVDDecomposable for ArrayBase, Ix2> impl LUDecomposable for ArrayBase, Ix2> {} impl SVDDecomposable for ArrayBase, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for ArrayView<'a, T, Ix2> {} +impl ArrayView2 for ArrayView<'_, T, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray - for ArrayViewMut<'a, T, Ix2> -{ +impl BaseArray for ArrayViewMut<'_, T, Ix2> { fn get(&self, pos: (usize, usize)) -> &T { &self[[pos.0, pos.1]] } @@ -175,9 +173,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray - for ArrayViewMut<'a, T, Ix2> -{ +impl MutArray for ArrayViewMut<'_, T, Ix2> { fn set(&mut self, pos: (usize, usize), x: T) { self[[pos.0, pos.1]] = x } @@ -195,9 +191,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray } } -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2 for ArrayViewMut<'a, T, Ix2> {} +impl MutArrayView2 for ArrayViewMut<'_, T, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for ArrayViewMut<'a, T, Ix2> {} +impl ArrayView2 for ArrayViewMut<'_, T, Ix2> {} #[cfg(test)] mod tests { diff --git a/src/linalg/ndarray/vector.rs b/src/linalg/ndarray/vector.rs index 7105da89..de3f7d93 100644 --- a/src/linalg/ndarray/vector.rs +++ b/src/linalg/ndarray/vector.rs @@ -41,7 +41,7 @@ impl ArrayView1 for ArrayBase impl MutArrayView1 for ArrayBase, Ix1> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayView<'a, T, Ix1> { +impl BaseArray for ArrayView<'_, T, Ix1> { fn get(&self, i: usize) -> &T { &self[i] } @@ -60,9 +60,9 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayView<'a } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for ArrayView<'a, T, Ix1> {} +impl ArrayView1 for ArrayView<'_, T, Ix1> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayViewMut<'a, T, Ix1> { +impl BaseArray for ArrayViewMut<'_, T, Ix1> { fn get(&self, i: usize) -> &T { &self[i] } @@ -81,7 +81,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayViewMut } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray for ArrayViewMut<'a, T, Ix1> { +impl MutArray for ArrayViewMut<'_, T, Ix1> { fn set(&mut self, i: usize, x: T) { self[i] = x; } @@ -92,8 +92,8 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray for ArrayViewMut< } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for ArrayViewMut<'a, T, Ix1> {} -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1 for ArrayViewMut<'a, T, Ix1> {} +impl ArrayView1 for ArrayViewMut<'_, T, Ix1> {} +impl MutArrayView1 for ArrayViewMut<'_, T, Ix1> {} impl Array1 for ArrayBase, Ix1> { fn slice<'a>(&'a self, range: Range) -> Box + 'a> { diff --git a/src/linalg/traits/cholesky.rs b/src/linalg/traits/cholesky.rs index 1394270f..baec8f87 100644 --- a/src/linalg/traits/cholesky.rs +++ b/src/linalg/traits/cholesky.rs @@ -15,7 +15,7 @@ //! &[25., 15., -5.], //! &[15., 18., 0.], //! &[-5., 0., 11.] -//! ]); +//! ]).unwrap(); //! //! let cholesky = A.cholesky().unwrap(); //! let lower_triangular: DenseMatrix = cholesky.L(); @@ -175,11 +175,14 @@ mod tests { )] #[test] fn cholesky_decompose() { - let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); + let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]) + .unwrap(); let l = - DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]]); + DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]]) + .unwrap(); let u = - DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]]); + DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]]) + .unwrap(); let cholesky = a.cholesky().unwrap(); assert!(relative_eq!(cholesky.L().abs(), l.abs(), epsilon = 1e-4)); @@ -197,9 +200,10 @@ mod tests { )] #[test] fn cholesky_solve_mut() { - let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); - let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]); - let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]); + let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]) + .unwrap(); + let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]).unwrap(); + let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap(); let cholesky = a.cholesky().unwrap(); diff --git a/src/linalg/traits/evd.rs b/src/linalg/traits/evd.rs index ccbdded6..3bb382a0 100644 --- a/src/linalg/traits/evd.rs +++ b/src/linalg/traits/evd.rs @@ -19,7 +19,7 @@ //! &[0.9000, 0.4000, 0.7000], //! &[0.4000, 0.5000, 0.3000], //! &[0.7000, 0.3000, 0.8000], -//! ]); +//! ]).unwrap(); //! //! let evd = A.evd(true).unwrap(); //! let eigenvectors: DenseMatrix = evd.V; @@ -820,7 +820,8 @@ mod tests { &[0.9000, 0.4000, 0.7000], &[0.4000, 0.5000, 0.3000], &[0.7000, 0.3000, 0.8000], - ]); + ]) + .unwrap(); let eigen_values: Vec = vec![1.7498382, 0.3165784, 0.1335834]; @@ -828,7 +829,8 @@ mod tests { &[0.6881997, -0.07121225, 0.7220180], &[0.3700456, 0.89044952, -0.2648886], &[0.6240573, -0.44947578, -0.6391588], - ]); + ]) + .unwrap(); let evd = A.evd(true).unwrap(); @@ -839,7 +841,7 @@ mod tests { )); for (i, eigen_values_i) in eigen_values.iter().enumerate() { assert!((eigen_values_i - evd.d[i]).abs() < 1e-4); - assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON); + assert!((0f64 - evd.e[i]).abs() < f64::EPSILON); } } #[cfg_attr( @@ -852,7 +854,8 @@ mod tests { &[0.9000, 0.4000, 0.7000], &[0.4000, 0.5000, 0.3000], &[0.8000, 0.3000, 0.8000], - ]); + ]) + .unwrap(); let eigen_values: Vec = vec![1.79171122, 0.31908143, 0.08920735]; @@ -860,7 +863,8 @@ mod tests { &[0.7178958, 0.05322098, 0.6812010], &[0.3837711, -0.84702111, -0.1494582], &[0.6952105, 0.43984484, -0.7036135], - ]); + ]) + .unwrap(); let evd = A.evd(false).unwrap(); @@ -871,7 +875,7 @@ mod tests { )); for (i, eigen_values_i) in eigen_values.iter().enumerate() { assert!((eigen_values_i - evd.d[i]).abs() < 1e-4); - assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON); + assert!((0f64 - evd.e[i]).abs() < f64::EPSILON); } } #[cfg_attr( @@ -885,7 +889,8 @@ mod tests { &[4.0, -1.0, 1.0, 1.0], &[1.0, 1.0, 3.0, -2.0], &[1.0, 1.0, 4.0, -1.0], - ]); + ]) + .unwrap(); let eigen_values_d: Vec = vec![0.0, 2.0, 2.0, 0.0]; let eigen_values_e: Vec = vec![2.2361, 0.9999, -0.9999, -2.2361]; @@ -895,7 +900,8 @@ mod tests { &[-0.6707, 0.1059, 0.901, 0.6289], &[0.9159, -0.1378, 0.3816, 0.0806], &[0.6707, 0.1059, 0.901, -0.6289], - ]); + ]) + .unwrap(); let evd = A.evd(false).unwrap(); diff --git a/src/linalg/traits/high_order.rs b/src/linalg/traits/high_order.rs index f1f86672..d3466e20 100644 --- a/src/linalg/traits/high_order.rs +++ b/src/linalg/traits/high_order.rs @@ -12,9 +12,9 @@ pub trait HighOrderOperations: Array2 { /// use smartcore::linalg::traits::high_order::HighOrderOperations; /// use smartcore::linalg::basic::arrays::Array2; /// - /// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]); - /// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]); - /// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]); + /// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]).unwrap(); + /// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]).unwrap(); + /// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]).unwrap(); /// /// assert_eq!(a.ab(true, &b, false), expected); /// ``` diff --git a/src/linalg/traits/lu.rs b/src/linalg/traits/lu.rs index 1f0d5f47..7a1d0439 100644 --- a/src/linalg/traits/lu.rs +++ b/src/linalg/traits/lu.rs @@ -18,7 +18,7 @@ //! &[1., 2., 3.], //! &[0., 1., 5.], //! &[5., 6., 0.] -//! ]); +//! ]).unwrap(); //! //! let lu = A.lu().unwrap(); //! let lower: DenseMatrix = lu.L(); @@ -263,13 +263,13 @@ mod tests { )] #[test] fn decompose() { - let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]); + let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap(); let expected_L = - DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]); + DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]).unwrap(); let expected_U = - DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]); + DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]).unwrap(); let expected_pivot = - DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]); + DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]).unwrap(); let lu = a.lu().unwrap(); assert!(relative_eq!(lu.L(), expected_L, epsilon = 1e-4)); assert!(relative_eq!(lu.U(), expected_U, epsilon = 1e-4)); @@ -281,9 +281,10 @@ mod tests { )] #[test] fn inverse() { - let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]); + let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap(); let expected = - DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]]); + DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]]) + .unwrap(); let a_inv = a.lu().and_then(|lu| lu.inverse()).unwrap(); assert!(relative_eq!(a_inv, expected, epsilon = 1e-4)); } diff --git a/src/linalg/traits/qr.rs b/src/linalg/traits/qr.rs index eb452e13..2c70efcb 100644 --- a/src/linalg/traits/qr.rs +++ b/src/linalg/traits/qr.rs @@ -13,7 +13,7 @@ //! &[0.9, 0.4, 0.7], //! &[0.4, 0.5, 0.3], //! &[0.7, 0.3, 0.8] -//! ]); +//! ]).unwrap(); //! //! let qr = A.qr().unwrap(); //! let orthogonal: DenseMatrix = qr.Q(); @@ -201,17 +201,20 @@ mod tests { )] #[test] fn decompose() { - let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); + let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]) + .unwrap(); let q = DenseMatrix::from_2d_array(&[ &[-0.7448, 0.2436, 0.6212], &[-0.331, -0.9432, -0.027], &[-0.5793, 0.2257, -0.7832], - ]); + ]) + .unwrap(); let r = DenseMatrix::from_2d_array(&[ &[-1.2083, -0.6373, -1.0842], &[0.0, -0.3064, 0.0682], &[0.0, 0.0, -0.1999], - ]); + ]) + .unwrap(); let qr = a.qr().unwrap(); assert!(relative_eq!(qr.Q().abs(), q.abs(), epsilon = 1e-4)); assert!(relative_eq!(qr.R().abs(), r.abs(), epsilon = 1e-4)); @@ -223,13 +226,15 @@ mod tests { )] #[test] fn qr_solve_mut() { - let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); - let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]); + let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]) + .unwrap(); + let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap(); let expected_w = DenseMatrix::from_2d_array(&[ &[-0.2027027, -1.2837838], &[0.8783784, 2.2297297], &[0.4729730, 0.6621622], - ]); + ]) + .unwrap(); let w = a.qr_solve_mut(b).unwrap(); assert!(relative_eq!(w, expected_w, epsilon = 1e-2)); } diff --git a/src/linalg/traits/stats.rs b/src/linalg/traits/stats.rs index 052da476..6c3db820 100644 --- a/src/linalg/traits/stats.rs +++ b/src/linalg/traits/stats.rs @@ -136,13 +136,12 @@ pub trait MatrixPreprocessing: MutArrayView2 + Clone { /// ```rust /// use smartcore::linalg::basic::matrix::DenseMatrix; /// use smartcore::linalg::traits::stats::MatrixPreprocessing; - /// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]); - /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]); + /// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap(); + /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap(); /// a.binarize_mut(0.); /// /// assert_eq!(a, expected); /// ``` - fn binarize_mut(&mut self, threshold: T) { let (nrows, ncols) = self.shape(); for row in 0..nrows { @@ -159,8 +158,8 @@ pub trait MatrixPreprocessing: MutArrayView2 + Clone { /// ```rust /// use smartcore::linalg::basic::matrix::DenseMatrix; /// use smartcore::linalg::traits::stats::MatrixPreprocessing; - /// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]); - /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]); + /// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap(); + /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap(); /// /// assert_eq!(a.binarize(0.), expected); /// ``` @@ -186,7 +185,8 @@ mod tests { &[1., 2., 3., 1., 2.], &[4., 5., 6., 3., 4.], &[7., 8., 9., 5., 6.], - ]); + ]) + .unwrap(); let expected_0 = vec![4., 5., 6., 3., 4.]; let expected_1 = vec![1.8, 4.4, 7.]; @@ -196,7 +196,7 @@ mod tests { #[test] fn test_var() { - let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); + let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap(); let expected_0 = vec![4., 4., 4., 4.]; let expected_1 = vec![1.25, 1.25]; @@ -211,12 +211,13 @@ mod tests { let m = DenseMatrix::from_2d_array(&[ &[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25], &[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25], - ]); + ]) + .unwrap(); let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let expected_1 = vec![1.25, 1.25]; - assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON)); - assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON)); + assert!(m.var(0).approximate_eq(&expected_0, f64::EPSILON)); + assert!(m.var(1).approximate_eq(&expected_1, f64::EPSILON)); assert_eq!( m.mean(0), vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] @@ -230,7 +231,8 @@ mod tests { &[1., 2., 3., 1., 2.], &[4., 5., 6., 3., 4.], &[7., 8., 9., 5., 6.], - ]); + ]) + .unwrap(); let expected_0 = vec![ 2.449489742783178, 2.449489742783178, @@ -251,10 +253,10 @@ mod tests { #[test] fn test_scale() { let m: DenseMatrix = - DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); + DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap(); let expected_0: DenseMatrix = - DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]); + DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]).unwrap(); let expected_1: DenseMatrix = DenseMatrix::from_2d_array(&[ &[ -1.3416407864998738, @@ -268,7 +270,8 @@ mod tests { 0.4472135954999579, 1.3416407864998738, ], - ]); + ]) + .unwrap(); assert_eq!(m.mean(0), vec![3.0, 4.0, 5.0, 6.0]); assert_eq!(m.mean(1), vec![2.5, 6.5]); diff --git a/src/linalg/traits/svd.rs b/src/linalg/traits/svd.rs index 8608942d..cee33a0e 100644 --- a/src/linalg/traits/svd.rs +++ b/src/linalg/traits/svd.rs @@ -17,7 +17,7 @@ //! &[0.9, 0.4, 0.7], //! &[0.4, 0.5, 0.3], //! &[0.7, 0.3, 0.8] -//! ]); +//! ]).unwrap(); //! //! let svd = A.svd().unwrap(); //! let u: DenseMatrix = svd.U; @@ -48,11 +48,9 @@ pub struct SVD> { pub V: M, /// Singular values of the original matrix pub s: Vec, - /// m: usize, - /// n: usize, - /// + /// Tolerance tol: T, } @@ -489,7 +487,8 @@ mod tests { &[0.9000, 0.4000, 0.7000], &[0.4000, 0.5000, 0.3000], &[0.7000, 0.3000, 0.8000], - ]); + ]) + .unwrap(); let s: Vec = vec![1.7498382, 0.3165784, 0.1335834]; @@ -497,13 +496,15 @@ mod tests { &[0.6881997, -0.07121225, 0.7220180], &[0.3700456, 0.89044952, -0.2648886], &[0.6240573, -0.44947578, -0.639158], - ]); + ]) + .unwrap(); let V = DenseMatrix::from_2d_array(&[ &[0.6881997, -0.07121225, 0.7220180], &[0.3700456, 0.89044952, -0.2648886], &[0.6240573, -0.44947578, -0.6391588], - ]); + ]) + .unwrap(); let svd = A.svd().unwrap(); @@ -577,7 +578,8 @@ mod tests { -0.2158704, -0.27529472, ], - ]); + ]) + .unwrap(); let s: Vec = vec![ 3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515, @@ -647,7 +649,8 @@ mod tests { 0.73034065, -0.43965505, ], - ]); + ]) + .unwrap(); let V = DenseMatrix::from_2d_array(&[ &[ @@ -707,7 +710,8 @@ mod tests { 0.1654796, -0.32346758, ], - ]); + ]) + .unwrap(); let svd = A.svd().unwrap(); @@ -723,10 +727,11 @@ mod tests { )] #[test] fn solve() { - let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); - let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]); + let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]) + .unwrap(); + let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap(); let expected_w = - DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]); + DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]).unwrap(); let w = a.svd_solve_mut(b).unwrap(); assert!(relative_eq!(w, expected_w, epsilon = 1e-2)); } @@ -737,7 +742,8 @@ mod tests { )] #[test] fn decompose_restore() { - let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]); + let a = + DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]).unwrap(); let svd = a.svd().unwrap(); let u: &DenseMatrix = &svd.U; //U let v: &DenseMatrix = &svd.V; // V diff --git a/src/linear/bg_solver.rs b/src/linear/bg_solver.rs index d1ad29f2..2c466b13 100644 --- a/src/linear/bg_solver.rs +++ b/src/linear/bg_solver.rs @@ -12,7 +12,8 @@ //! pub struct BGSolver {} //! impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> for BGSolver {} //! -//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); +//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., +//! 11.]]).unwrap(); //! let b = vec![40., 51., 28.]; //! let expected = vec![1.0, 2.0, 3.0]; //! let mut x = Vec::zeros(3); @@ -26,9 +27,9 @@ use crate::error::Failed; use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1}; use crate::numbers::floatnum::FloatNumber; -/// +/// Trait for Biconjugate Gradient Solver pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2> { - /// + /// Solve Ax = b fn solve_mut( &self, a: &'a X, @@ -108,7 +109,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2> { Ok(err) } - /// + /// solve preconditioner fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) { let diag = Self::diag(a); let n = diag.len(); @@ -132,7 +133,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2> { y.copy_from(&x.xa(true, a)); } - /// + /// Extract the diagonal from a matrix fn diag(a: &X) -> Vec { let (nrows, ncols) = a.shape(); let n = nrows.min(ncols); @@ -158,9 +159,10 @@ mod tests { #[test] fn bg_solver() { - let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); + let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]) + .unwrap(); let b = vec![40., 51., 28.]; - let expected = vec![1.0, 2.0, 3.0]; + let expected = [1.0, 2.0, 3.0]; let mut x = Vec::zeros(3); diff --git a/src/linear/elastic_net.rs b/src/linear/elastic_net.rs index 87deddca..643ab14e 100644 --- a/src/linear/elastic_net.rs +++ b/src/linear/elastic_net.rs @@ -38,7 +38,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -511,7 +511,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -562,7 +563,8 @@ mod tests { &[17.0, 1918.0, 1.4054969025700674], &[18.0, 1929.0, 1.3271699396384906], &[19.0, 1915.0, 1.1373332337674806], - ]); + ]) + .unwrap(); let y: Vec = vec![ 1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42, @@ -627,7 +629,7 @@ mod tests { // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - // ]); + // ]).unwrap(); // let y = vec![ // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/linear/lasso.rs b/src/linear/lasso.rs index 8de391fc..2919b025 100644 --- a/src/linear/lasso.rs +++ b/src/linear/lasso.rs @@ -418,7 +418,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/linear/lasso_optimizer.rs b/src/linear/lasso_optimizer.rs index 3f18c030..22119160 100644 --- a/src/linear/lasso_optimizer.rs +++ b/src/linear/lasso_optimizer.rs @@ -16,7 +16,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1, MutArray, MutArra use crate::linear::bg_solver::BiconjugateGradientSolver; use crate::numbers::floatnum::FloatNumber; -/// +/// Interior Point Optimizer pub struct InteriorPointOptimizer> { ata: X, d1: Vec, @@ -25,9 +25,8 @@ pub struct InteriorPointOptimizer> { prs: Vec, } -/// impl> InteriorPointOptimizer { - /// + /// Initialize a new Interior Point Optimizer pub fn new(a: &X, n: usize) -> InteriorPointOptimizer { InteriorPointOptimizer { ata: a.ab(true, a, false), @@ -38,7 +37,7 @@ impl> InteriorPointOptimizer { } } - /// + /// Run the optimization pub fn optimize( &mut self, x: &X, @@ -101,7 +100,7 @@ impl> InteriorPointOptimizer { // CALCULATE DUALITY GAP let xnu = nu.xa(false, x); - let max_xnu = xnu.norm(std::f64::INFINITY); + let max_xnu = xnu.norm(f64::INFINITY); if max_xnu > lambda_f64 { let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap(); nu.mul_scalar_mut(lnu); @@ -208,7 +207,6 @@ impl> InteriorPointOptimizer { Ok(w) } - /// fn sumlogneg(f: &X) -> T { let (n, _) = f.shape(); let mut sum = T::zero(); @@ -220,11 +218,9 @@ impl> InteriorPointOptimizer { } } -/// impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> for InteriorPointOptimizer { - /// fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) { let (_, p) = a.shape(); @@ -234,7 +230,6 @@ impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> } } - /// fn mat_vec_mul(&self, _: &X, x: &Vec, y: &mut Vec) { let (_, p) = self.ata.shape(); let x_slice = Vec::from_slice(x.slice(0..p).as_ref()); @@ -246,7 +241,6 @@ impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> } } - /// fn mat_t_vec_mul(&self, a: &X, x: &Vec, y: &mut Vec) { self.mat_vec_mul(a, x, y); } diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index a5c76999..43410bbb 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -40,7 +40,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -341,7 +341,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, @@ -393,7 +394,7 @@ mod tests { // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - // ]); + // ]).unwrap(); // let y = vec![ // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 4a4041bc..c28dc347 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -35,7 +35,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! ]; @@ -183,14 +183,11 @@ pub struct LogisticRegression< } trait ObjectiveFunction> { - /// fn f(&self, w_bias: &[T]) -> T; - /// #[allow(clippy::ptr_arg)] fn df(&self, g: &mut Vec, w_bias: &Vec); - /// #[allow(clippy::ptr_arg)] fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T { let mut sum = T::zero(); @@ -261,8 +258,8 @@ impl, Y: } } -impl<'a, T: Number + FloatNumber, X: Array2> ObjectiveFunction - for BinaryObjectiveFunction<'a, T, X> +impl> ObjectiveFunction + for BinaryObjectiveFunction<'_, T, X> { fn f(&self, w_bias: &[T]) -> T { let mut f = T::zero(); @@ -316,8 +313,8 @@ struct MultiClassObjectiveFunction<'a, T: Number + FloatNumber, X: Array2> { _phantom_t: PhantomData, } -impl<'a, T: Number + FloatNumber + RealNumber, X: Array2> ObjectiveFunction - for MultiClassObjectiveFunction<'a, T, X> +impl> ObjectiveFunction + for MultiClassObjectiveFunction<'_, T, X> { fn f(&self, w_bias: &[T]) -> T { let mut f = T::zero(); @@ -416,7 +413,7 @@ impl, Y: /// Fits Logistic Regression to your data. /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation. /// * `y` - target class values - /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values. + /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values. pub fn fit( x: &X, y: &Y, @@ -611,7 +608,8 @@ mod tests { &[10., -2.], &[8., 2.], &[9., 0.], - ]); + ]) + .unwrap(); let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; @@ -628,11 +626,11 @@ mod tests { objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]); objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]); - assert!((g[0] + 33.000068218163484).abs() < std::f64::EPSILON); + assert!((g[0] + 33.000068218163484).abs() < f64::EPSILON); let f = objective.f(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]); - assert!((f - 408.0052230582765).abs() < std::f64::EPSILON); + assert!((f - 408.0052230582765).abs() < f64::EPSILON); let objective_reg = MultiClassObjectiveFunction { x: &x, @@ -671,7 +669,8 @@ mod tests { &[10., -2.], &[8., 2.], &[9., 0.], - ]); + ]) + .unwrap(); let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1]; @@ -687,13 +686,13 @@ mod tests { objective.df(&mut g, &vec![1., 2., 3.]); objective.df(&mut g, &vec![1., 2., 3.]); - assert!((g[0] - 26.051064349381285).abs() < std::f64::EPSILON); - assert!((g[1] - 10.239000702928523).abs() < std::f64::EPSILON); - assert!((g[2] - 3.869294270156324).abs() < std::f64::EPSILON); + assert!((g[0] - 26.051064349381285).abs() < f64::EPSILON); + assert!((g[1] - 10.239000702928523).abs() < f64::EPSILON); + assert!((g[2] - 3.869294270156324).abs() < f64::EPSILON); let f = objective.f(&[1., 2., 3.]); - assert!((f - 59.76994756647412).abs() < std::f64::EPSILON); + assert!((f - 59.76994756647412).abs() < f64::EPSILON); let objective_reg = BinaryObjectiveFunction { x: &x, @@ -733,7 +732,8 @@ mod tests { &[10., -2.], &[8., 2.], &[9., 0.], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); @@ -818,37 +818,41 @@ mod tests { assert!(reg_coeff_sum < coeff); } - // TODO: serialization for the new DenseMatrix needs to be implemented - // #[cfg_attr(all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test)] - // #[test] - // #[cfg(feature = "serde")] - // fn serde() { - // let x = DenseMatrix::from_2d_array(&[ - // &[1., -5.], - // &[2., 5.], - // &[3., -2.], - // &[1., 2.], - // &[2., 0.], - // &[6., -5.], - // &[7., 5.], - // &[6., -2.], - // &[7., 2.], - // &[6., 0.], - // &[8., -5.], - // &[9., 5.], - // &[10., -2.], - // &[8., 2.], - // &[9., 0.], - // ]); - // let y: Vec = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; - - // let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); - - // let deserialized_lr: LogisticRegression, Vec> = - // serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); - - // assert_eq!(lr, deserialized_lr); - // } + //TODO: serialization for the new DenseMatrix needs to be implemented + #[cfg_attr( + all(target_arch = "wasm32", not(target_os = "wasi")), + wasm_bindgen_test::wasm_bindgen_test + )] + #[test] + #[cfg(feature = "serde")] + fn serde() { + let x: DenseMatrix = DenseMatrix::from_2d_array(&[ + &[1., -5.], + &[2., 5.], + &[3., -2.], + &[1., 2.], + &[2., 0.], + &[6., -5.], + &[7., 5.], + &[6., -2.], + &[7., 2.], + &[6., 0.], + &[8., -5.], + &[9., 5.], + &[10., -2.], + &[8., 2.], + &[9., 0.], + ]) + .unwrap(); + let y: Vec = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; + + let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); + + let deserialized_lr: LogisticRegression, Vec> = + serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); + + assert_eq!(lr, deserialized_lr); + } #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), @@ -877,7 +881,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); @@ -890,11 +895,7 @@ mod tests { let y_hat = lr.predict(&x).unwrap(); - let error: i32 = y - .into_iter() - .zip(y_hat.into_iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); + let error: i32 = y.into_iter().zip(y_hat).map(|(a, b)| (a - b).abs()).sum(); assert!(error <= 1); @@ -903,4 +904,46 @@ mod tests { assert!(reg_coeff_sum < coeff); } + #[cfg_attr( + all(target_arch = "wasm32", not(target_os = "wasi")), + wasm_bindgen_test::wasm_bindgen_test + )] + #[test] + fn lr_fit_predict_random() { + let x: DenseMatrix = DenseMatrix::rand(52181, 94); + let y1: Vec = vec![1; 2181]; + let y2: Vec = vec![0; 50000]; + let y: Vec = y1.into_iter().chain(y2).collect(); + + let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); + let lr_reg = LogisticRegression::fit( + &x, + &y, + LogisticRegressionParameters::default().with_alpha(1.0), + ) + .unwrap(); + + let y_hat = lr.predict(&x).unwrap(); + let y_hat_reg = lr_reg.predict(&x).unwrap(); + + assert_eq!(y.len(), y_hat.len()); + assert_eq!(y.len(), y_hat_reg.len()); + } + + #[test] + fn test_logit() { + let x: &DenseMatrix = &DenseMatrix::rand(52181, 94); + let y1: Vec = vec![1; 2181]; + let y2: Vec = vec![0; 50000]; + let y: &Vec = &(y1.into_iter().chain(y2).collect()); + println!("y vec height: {:?}", y.len()); + println!("x matrix shape: {:?}", x.shape()); + + let lr = LogisticRegression::fit(x, y, Default::default()).unwrap(); + let y_hat = lr.predict(x).unwrap(); + + println!("y_hat shape: {:?}", y_hat.shape()); + + assert_eq!(y_hat.shape(), 52181); + } } diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 2c354299..be2f3d41 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -40,7 +40,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -455,7 +455,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -513,7 +514,7 @@ mod tests { // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - // ]); + // ]).unwrap(); // let y = vec![ // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/metrics/distance/mahalanobis.rs b/src/metrics/distance/mahalanobis.rs index e526c20e..a9347a58 100644 --- a/src/metrics/distance/mahalanobis.rs +++ b/src/metrics/distance/mahalanobis.rs @@ -25,7 +25,7 @@ //! &[68., 590., 37.], //! &[69., 660., 46.], //! &[73., 600., 55.], -//! ]); +//! ]).unwrap(); //! //! let a = data.mean_by(0); //! let b = vec![66., 640., 44.]; @@ -151,7 +151,8 @@ mod tests { &[68., 590., 37.], &[69., 660., 46.], &[73., 600., 55.], - ]); + ]) + .unwrap(); let a = data.mean_by(0); let b = vec![66., 640., 44.]; diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index c7e1be3d..a7184293 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -37,7 +37,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! ]; diff --git a/src/model_selection/hyper_tuning/grid_search.rs b/src/model_selection/hyper_tuning/grid_search.rs index 3c914e48..74242c60 100644 --- a/src/model_selection/hyper_tuning/grid_search.rs +++ b/src/model_selection/hyper_tuning/grid_search.rs @@ -3,9 +3,9 @@ use crate::{ api::{Predictor, SupervisedEstimator}, error::{Failed, FailedError}, - linalg::basic::arrays::{Array2, Array1}, - numbers::realnum::RealNumber, + linalg::basic::arrays::{Array1, Array2}, numbers::basenum::Number, + numbers::realnum::RealNumber, }; use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult}; diff --git a/src/model_selection/kfold.rs b/src/model_selection/kfold.rs index 760881b7..d7ad22d2 100644 --- a/src/model_selection/kfold.rs +++ b/src/model_selection/kfold.rs @@ -283,9 +283,7 @@ mod tests { (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), ]; - for ((train, test), (expected_train, expected_test)) in - k.split(&x).into_iter().zip(expected) - { + for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) { assert_eq!(test, expected_test); assert_eq!(train, expected_train); } @@ -307,9 +305,7 @@ mod tests { (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), ]; - for ((train, test), (expected_train, expected_test)) in - k.split(&x).into_iter().zip(expected) - { + for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) { assert_eq!(test.len(), expected_test.len()); assert_eq!(train.len(), expected_train.len()); } diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index 27571fdb..e72787b7 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -36,7 +36,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., //! ]; @@ -84,7 +84,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! ]; @@ -396,7 +396,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let cv = KFold { @@ -441,7 +442,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -489,7 +491,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -539,7 +542,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let cv = KFold::default().with_n_splits(3); diff --git a/src/naive_bayes/bernoulli.rs b/src/naive_bayes/bernoulli.rs index 52a3c52e..4be62d56 100644 --- a/src/naive_bayes/bernoulli.rs +++ b/src/naive_bayes/bernoulli.rs @@ -19,14 +19,14 @@ //! &[0, 1, 0, 0, 1, 0], //! &[0, 1, 0, 1, 0, 0], //! &[0, 1, 1, 0, 0, 1], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![0, 0, 0, 1]; //! //! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); //! //! // Testing data point is: //! // Chinese Chinese Chinese Tokyo Japan -//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]); +//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]).unwrap(); //! let y_hat = nb.predict(&x_test).unwrap(); //! ``` //! @@ -258,7 +258,7 @@ impl BernoulliNBDistribution { /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, - /// priors are adjusted according to the data. + /// priors are adjusted according to the data. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. /// * `binarize` - Threshold for binarizing. fn fit, Y: Array1>( @@ -402,10 +402,10 @@ impl, Y: Arr { /// Fits BernoulliNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors, alpha for smoothing and - /// binarizing threshold. + /// binarizing threshold. pub fn fit(x: &X, y: &Y, parameters: BernoulliNBParameters) -> Result { let distribution = if let Some(threshold) = parameters.binarize { BernoulliNBDistribution::fit( @@ -427,6 +427,7 @@ impl, Y: Arr /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { if let Some(threshold) = self.binarize { @@ -527,7 +528,8 @@ mod tests { &[0.0, 1.0, 0.0, 0.0, 1.0, 0.0], &[0.0, 1.0, 0.0, 1.0, 0.0, 0.0], &[0.0, 1.0, 1.0, 0.0, 0.0, 1.0], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 1]; let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); @@ -558,7 +560,7 @@ mod tests { // Testing data point is: // Chinese Chinese Chinese Tokyo Japan - let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]); + let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]).unwrap(); let y_hat = bnb.predict(&x_test).unwrap(); assert_eq!(y_hat, &[1]); @@ -586,7 +588,8 @@ mod tests { &[2, 0, 3, 3, 1, 2, 0, 2, 4, 1], &[2, 4, 0, 4, 2, 4, 1, 3, 1, 4], &[0, 2, 2, 3, 4, 0, 4, 4, 4, 4], - ]); + ]) + .unwrap(); let y: Vec = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2]; let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); @@ -643,7 +646,8 @@ mod tests { &[0, 1, 0, 0, 1, 0], &[0, 1, 0, 1, 0, 0], &[0, 1, 1, 0, 0, 1], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 1]; let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs index 2c0c7a4a..b60ee0d3 100644 --- a/src/naive_bayes/categorical.rs +++ b/src/naive_bayes/categorical.rs @@ -24,7 +24,7 @@ //! &[3, 4, 2, 4], //! &[0, 3, 1, 2], //! &[0, 4, 1, 2], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; //! //! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); @@ -95,7 +95,7 @@ impl PartialEq for CategoricalNBDistribution { return false; } for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) { - if (*a_i_j - *b_i_j).abs() > std::f64::EPSILON { + if (*a_i_j - *b_i_j).abs() > f64::EPSILON { return false; } } @@ -363,7 +363,7 @@ impl, Y: Array1> Predictor for Categ impl, Y: Array1> CategoricalNB { /// Fits CategoricalNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like alpha for smoothing pub fn fit(x: &X, y: &Y, parameters: CategoricalNBParameters) -> Result { @@ -375,6 +375,7 @@ impl, Y: Array1> CategoricalNB { /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { self.inner.as_ref().unwrap().predict(x) @@ -455,7 +456,8 @@ mod tests { &[1, 1, 1, 1], &[1, 2, 0, 0], &[2, 1, 1, 1], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); @@ -513,7 +515,7 @@ mod tests { ] ); - let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]); + let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]).unwrap(); let y_hat = cnb.predict(&x_test).unwrap(); assert_eq!(y_hat, vec![0, 1]); } @@ -539,7 +541,8 @@ mod tests { &[3, 4, 2, 4], &[0, 3, 1, 2], &[0, 4, 1, 2], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); @@ -571,7 +574,8 @@ mod tests { &[3, 4, 2, 4], &[0, 3, 1, 2], &[0, 4, 1, 2], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index a62d3c91..e774fdc9 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -16,7 +16,7 @@ //! &[ 1., 1.], //! &[ 2., 1.], //! &[ 3., 2.], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![1, 1, 1, 2, 2, 2]; //! //! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); @@ -175,7 +175,7 @@ impl GaussianNBDistribution { /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, - /// priors are adjusted according to the data. + /// priors are adjusted according to the data. pub fn fit, Y: Array1>( x: &X, y: &Y, @@ -317,7 +317,7 @@ impl, Y: Arr { /// Fits GaussianNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors. pub fn fit(x: &X, y: &Y, parameters: GaussianNBParameters) -> Result { @@ -328,6 +328,7 @@ impl, Y: Arr /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { self.inner.as_ref().unwrap().predict(x) @@ -395,7 +396,8 @@ mod tests { &[1., 1.], &[2., 1.], &[3., 2.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 1, 2, 2, 2]; let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); @@ -422,23 +424,6 @@ mod tests { ); } - #[test] - fn run_gaussian_naive_bayes_with_few_samples() { - let x = - DenseMatrix::::from_2d_array(&[&[-1., -1.], &[-2., -1.], &[-3., -2.], &[1., 1.]]); - let y: Vec = vec![1, 1, 1, 2]; - - let gnb = GaussianNB::fit(&x, &y, Default::default()); - - match gnb.unwrap().predict(&x) { - Ok(_) => assert!(false, "test should return Failed"), - Err(err) => { - assert!(err.to_string() == "Can't find solution: log_likelihood for distribution of one of the rows is NaN"); - assert!(true) - } - } - } - #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test @@ -452,7 +437,8 @@ mod tests { &[1., 1.], &[2., 1.], &[3., 2.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 1, 2, 2, 2]; let priors = vec![0.3, 0.7]; @@ -479,7 +465,8 @@ mod tests { &[1., 1.], &[2., 1.], &[3., 2.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 1, 2, 2, 2]; let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index a476e8d2..c157de47 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -35,7 +35,7 @@ //! //! //! -use crate::error::{Failed, FailedError}; +use crate::error::Failed; use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1}; use crate::numbers::basenum::Number; #[cfg(feature = "serde")] @@ -89,45 +89,107 @@ impl, Y: Array1, D: NBDistribution Result { let y_classes = self.distribution.classes(); + if y_classes.is_empty() { + return Err(Failed::predict("Failed to predict, no classes available")); + } + let (rows, _) = x.shape(); - let mut log_likehood_is_nan = false; - let predictions = (0..rows) - .map(|row_index| { - let row = x.get_row(row_index); - let (prediction, _probability) = y_classes - .iter() - .enumerate() - .map(|(class_index, class)| { - let mut log_likelihood = - self.distribution.log_likelihood(class_index, &row); - if log_likelihood.is_nan() { - log_likelihood = 0f64; - log_likehood_is_nan = true; - } - ( - class, - log_likelihood + self.distribution.prior(class_index).ln(), - ) - }) - .max_by(|(_, p1), (_, p2)| p1.partial_cmp(p2).unwrap()) - .unwrap(); - *prediction - }) - .collect::>(); - if log_likehood_is_nan { - return Err(Failed::because( - FailedError::SolutionFailed, - "log_likelihood for distribution of one of the rows is NaN", - )); + let mut predictions = Vec::with_capacity(rows); + let mut all_probs_nan = true; + + for row_index in 0..rows { + let row = x.get_row(row_index); + let mut max_log_prob = f64::NEG_INFINITY; + let mut max_class = None; + + for (class_index, class) in y_classes.iter().enumerate() { + let log_likelihood = self.distribution.log_likelihood(class_index, &row); + let log_prob = log_likelihood + self.distribution.prior(class_index).ln(); + + if !log_prob.is_nan() && log_prob > max_log_prob { + max_log_prob = log_prob; + max_class = Some(*class); + all_probs_nan = false; + } + } + + predictions.push(max_class.unwrap_or(y_classes[0])); + } + + if all_probs_nan { + Err(Failed::predict( + "Failed to predict, all probabilities were NaN", + )) + } else { + Ok(Y::from_vec_slice(&predictions)) } - let y_hat = Y::from_vec_slice(&predictions); - Ok(y_hat) } } pub mod bernoulli; pub mod categorical; pub mod gaussian; pub mod multinomial; + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::basic::arrays::Array; + use crate::linalg::basic::matrix::DenseMatrix; + use num_traits::float::Float; + + type Model<'d> = BaseNaiveBayes, Vec, TestDistribution<'d>>; + + #[derive(Debug, PartialEq, Clone)] + struct TestDistribution<'d>(&'d Vec); + + impl NBDistribution for TestDistribution<'_> { + fn prior(&self, _class_index: usize) -> f64 { + 1. + } + + fn log_likelihood<'a>( + &'a self, + class_index: usize, + _j: &'a Box + 'a>, + ) -> f64 { + match self.0.get(class_index) { + &v @ 2 | &v @ 10 | &v @ 20 => v as f64, + _ => f64::nan(), + } + } + + fn classes(&self) -> &Vec { + self.0 + } + } + + #[test] + fn test_predict() { + let matrix = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); + + let val = vec![]; + match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { + Ok(_) => panic!("Should return error in case of empty classes"), + Err(err) => assert_eq!( + err.to_string(), + "Predict failed: Failed to predict, no classes available" + ), + } + + let val = vec![1, 2, 3]; + match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { + Ok(r) => assert_eq!(r, vec![2, 2, 2]), + Err(_) => panic!("Should success in normal case with NaNs"), + } + + let val = vec![20, 2, 10]; + match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { + Ok(r) => assert_eq!(r, vec![20, 20, 20]), + Err(_) => panic!("Should success in normal case without NaNs"), + } + } +} diff --git a/src/naive_bayes/multinomial.rs b/src/naive_bayes/multinomial.rs index a340c40d..e00965ed 100644 --- a/src/naive_bayes/multinomial.rs +++ b/src/naive_bayes/multinomial.rs @@ -20,13 +20,13 @@ //! &[0, 2, 0, 0, 1, 0], //! &[0, 1, 0, 1, 0, 0], //! &[0, 1, 1, 0, 0, 1], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![0, 0, 0, 1]; //! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); //! //! // Testing data point is: //! // Chinese Chinese Chinese Tokyo Japan -//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]); +//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap(); //! let y_hat = nb.predict(&x_test).unwrap(); //! ``` //! @@ -208,7 +208,7 @@ impl MultinomialNBDistribution { /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, - /// priors are adjusted according to the data. + /// priors are adjusted according to the data. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. pub fn fit, Y: Array1>( x: &X, @@ -345,10 +345,10 @@ impl, Y: Array { /// Fits MultinomialNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors, alpha for smoothing and - /// binarizing threshold. + /// binarizing threshold. pub fn fit(x: &X, y: &Y, parameters: MultinomialNBParameters) -> Result { let distribution = MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?; @@ -358,6 +358,7 @@ impl, Y: Array /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { self.inner.as_ref().unwrap().predict(x) @@ -433,7 +434,8 @@ mod tests { &[0, 2, 0, 0, 1, 0], &[0, 1, 0, 1, 0, 0], &[0, 1, 1, 0, 0, 1], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 1]; let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); @@ -467,7 +469,7 @@ mod tests { // Testing data point is: // Chinese Chinese Chinese Tokyo Japan - let x_test = DenseMatrix::::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]); + let x_test = DenseMatrix::::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap(); let y_hat = mnb.predict(&x_test).unwrap(); assert_eq!(y_hat, &[0]); @@ -495,7 +497,8 @@ mod tests { &[2, 0, 3, 3, 1, 2, 0, 2, 4, 1], &[2, 4, 0, 4, 2, 4, 1, 3, 1, 4], &[0, 2, 2, 3, 4, 0, 4, 4, 4, 4], - ]); + ]) + .unwrap(); let y: Vec = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2]; let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); @@ -554,7 +557,8 @@ mod tests { &[0, 1, 0, 0, 1, 0], &[0, 1, 0, 1, 0, 0], &[0, 1, 1, 0, 0, 1], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 1]; let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index a70a12ec..137143e0 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -22,7 +22,7 @@ //! &[3., 4.], //! &[5., 6.], //! &[7., 8.], -//! &[9., 10.]]); +//! &[9., 10.]]).unwrap(); //! let y = vec![2, 2, 2, 3, 3]; //your class labels //! //! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); @@ -211,7 +211,7 @@ impl, Y: Array1, D: Distance, Y: Array1, D: Distance Result { let mut result = Y::zeros(x.shape().0); @@ -311,7 +312,8 @@ mod tests { #[test] fn knn_fit_predict() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y = vec![2, 2, 2, 3, 3]; let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); let y_hat = knn.predict(&x).unwrap(); @@ -325,7 +327,7 @@ mod tests { )] #[test] fn knn_fit_predict_weighted() { - let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]); + let x = DenseMatrix::from_2d_array(&[&[1.], &[2.], &[3.], &[4.], &[5.]]).unwrap(); let y = vec![2, 2, 2, 3, 3]; let knn = KNNClassifier::fit( &x, @@ -336,7 +338,9 @@ mod tests { .with_weight(KNNWeightFunction::Distance), ) .unwrap(); - let y_hat = knn.predict(&DenseMatrix::from_2d_array(&[&[4.1]])).unwrap(); + let y_hat = knn + .predict(&DenseMatrix::from_2d_array(&[&[4.1]]).unwrap()) + .unwrap(); assert_eq!(vec![3], y_hat); } @@ -348,7 +352,8 @@ mod tests { #[cfg(feature = "serde")] fn serde() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y = vec![2, 2, 2, 3, 3]; let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index 914f810e..b49743f8 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -24,7 +24,7 @@ //! &[2., 2.], //! &[3., 3.], //! &[4., 4.], -//! &[5., 5.]]); +//! &[5., 5.]]).unwrap(); //! let y = vec![1., 2., 3., 4., 5.]; //your target values //! //! let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); @@ -88,25 +88,21 @@ pub struct KNNRegressor, Y: Array1, D: impl, Y: Array1, D: Distance>> KNNRegressor { - /// fn y(&self) -> &Y { self.y.as_ref().unwrap() } - /// fn knn_algorithm(&self) -> &KNNAlgorithm { self.knn_algorithm .as_ref() .expect("Missing parameter: KNNAlgorithm") } - /// fn weight(&self) -> &KNNWeightFunction { self.weight.as_ref().expect("Missing parameter: weight") } #[allow(dead_code)] - /// fn k(&self) -> usize { self.k.unwrap() } @@ -207,7 +203,7 @@ impl, Y: Array1, D: Distance>> { /// Fits KNN regressor to a NxM matrix where N is number of samples and M is number of features. /// * `x` - training data - /// * `y` - vector with real values + /// * `y` - vector with real values /// * `parameters` - additional parameters like search algorithm and k pub fn fit( x: &X, @@ -250,6 +246,7 @@ impl, Y: Array1, D: Distance>> /// Predict the target for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with estimates. pub fn predict(&self, x: &X) -> Result { let mut result = Y::zeros(x.shape().0); @@ -295,9 +292,10 @@ mod tests { #[test] fn knn_fit_predict_weighted() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y: Vec = vec![1., 2., 3., 4., 5.]; - let y_exp = vec![1., 2., 3., 4., 5.]; + let y_exp = [1., 2., 3., 4., 5.]; let knn = KNNRegressor::fit( &x, &y, @@ -311,7 +309,7 @@ mod tests { let y_hat = knn.predict(&x).unwrap(); assert_eq!(5, Vec::len(&y_hat)); for i in 0..y_hat.len() { - assert!((y_hat[i] - y_exp[i]).abs() < std::f64::EPSILON); + assert!((y_hat[i] - y_exp[i]).abs() < f64::EPSILON); } } @@ -322,9 +320,10 @@ mod tests { #[test] fn knn_fit_predict_uniform() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y: Vec = vec![1., 2., 3., 4., 5.]; - let y_exp = vec![2., 2., 3., 4., 4.]; + let y_exp = [2., 2., 3., 4., 4.]; let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); let y_hat = knn.predict(&x).unwrap(); assert_eq!(5, Vec::len(&y_hat)); @@ -341,7 +340,8 @@ mod tests { #[cfg(feature = "serde")] fn serde() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y = vec![1., 2., 3., 4., 5.]; let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index 9cc78f0c..0be7222f 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -1,5 +1,3 @@ -// TODO: missing documentation - use std::default::Default; use crate::linalg::basic::arrays::Array1; @@ -8,30 +6,27 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -/// +/// Gradient Descent optimization algorithm pub struct GradientDescent { - /// + /// Maximum number of iterations pub max_iter: usize, - /// + /// Relative tolerance for the gradient norm pub g_rtol: f64, - /// + /// Absolute tolerance for the gradient norm pub g_atol: f64, } -/// impl Default for GradientDescent { fn default() -> Self { GradientDescent { max_iter: 10000, - g_rtol: std::f64::EPSILON.sqrt(), - g_atol: std::f64::EPSILON, + g_rtol: f64::EPSILON.sqrt(), + g_atol: f64::EPSILON, } } } -/// impl FirstOrderOptimizer for GradientDescent { - /// fn optimize<'a, X: Array1, LS: LineSearchMethod>( &self, f: &'a F<'_, T, X>, diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index 81e7b640..b4f6c9f1 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -11,31 +11,29 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -/// +/// Limited-memory BFGS optimization algorithm pub struct LBFGS { - /// + /// Maximum number of iterations pub max_iter: usize, - /// + /// TODO: Add documentation pub g_rtol: f64, - /// + /// TODO: Add documentation pub g_atol: f64, - /// + /// TODO: Add documentation pub x_atol: f64, - /// + /// TODO: Add documentation pub x_rtol: f64, - /// + /// TODO: Add documentation pub f_abstol: f64, - /// + /// TODO: Add documentation pub f_reltol: f64, - /// + /// TODO: Add documentation pub successive_f_tol: usize, - /// + /// TODO: Add documentation pub m: usize, } -/// impl Default for LBFGS { - /// fn default() -> Self { LBFGS { max_iter: 1000, @@ -51,9 +49,7 @@ impl Default for LBFGS { } } -/// impl LBFGS { - /// fn two_loops>(&self, state: &mut LBFGSState) { let lower = state.iteration.max(self.m) - self.m; let upper = state.iteration; @@ -95,7 +91,6 @@ impl LBFGS { state.s.mul_scalar_mut(-T::one()); } - /// fn init_state>(&self, x: &X) -> LBFGSState { LBFGSState { x: x.clone(), @@ -119,7 +114,6 @@ impl LBFGS { } } - /// fn update_state<'a, T: FloatNumber + RealNumber, X: Array1, LS: LineSearchMethod>( &self, f: &'a F<'_, T, X>, @@ -161,7 +155,6 @@ impl LBFGS { df(&mut state.x_df, &state.x); } - /// fn assess_convergence>( &self, state: &mut LBFGSState, @@ -173,7 +166,7 @@ impl LBFGS { } if state.x.max_diff(&state.x_prev) - <= T::from_f64(self.x_rtol * state.x.norm(std::f64::INFINITY)).unwrap() + <= T::from_f64(self.x_rtol * state.x.norm(f64::INFINITY)).unwrap() { x_converged = true; } @@ -188,14 +181,13 @@ impl LBFGS { state.counter_f_tol += 1; } - if state.x_df.norm(std::f64::INFINITY) <= self.g_atol { + if state.x_df.norm(f64::INFINITY) <= self.g_atol { g_converged = true; } g_converged || x_converged || state.counter_f_tol > self.successive_f_tol } - /// fn update_hessian>( &self, _: &DF<'_, X>, @@ -212,7 +204,6 @@ impl LBFGS { } } -/// #[derive(Debug)] struct LBFGSState> { x: X, @@ -234,9 +225,7 @@ struct LBFGSState> { alpha: T, } -/// impl FirstOrderOptimizer for LBFGS { - /// fn optimize<'a, X: Array1, LS: LineSearchMethod>( &self, f: &F<'_, T, X>, @@ -248,7 +237,7 @@ impl FirstOrderOptimizer for LBFGS { df(&mut state.x_df, x0); - let g_converged = state.x_df.norm(std::f64::INFINITY) < self.g_atol; + let g_converged = state.x_df.norm(f64::INFINITY) < self.g_atol; let mut converged = g_converged; let stopped = false; @@ -299,7 +288,7 @@ mod tests { let result = optimizer.optimize(&f, &df, &x0, &ls); - assert!((result.f_x - 0.0).abs() < std::f64::EPSILON); + assert!((result.f_x - 0.0).abs() < f64::EPSILON); assert!((result.x[0] - 1.0).abs() < 1e-8); assert!((result.x[1] - 1.0).abs() < 1e-8); assert!(result.iterations <= 24); diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index 910be275..cf7e4f91 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -1,6 +1,6 @@ -/// +/// Gradient descent optimization algorithm pub mod gradient_descent; -/// +/// Limited-memory BFGS optimization algorithm pub mod lbfgs; use std::clone::Clone; @@ -11,9 +11,9 @@ use crate::numbers::floatnum::FloatNumber; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -/// +/// First-order optimization is a class of algorithms that use the first derivative of a function to find optimal solutions. pub trait FirstOrderOptimizer { - /// + /// run first order optimization fn optimize<'a, X: Array1, LS: LineSearchMethod>( &self, f: &F<'_, T, X>, @@ -23,13 +23,13 @@ pub trait FirstOrderOptimizer { ) -> OptimizerResult; } -/// +/// Result of optimization #[derive(Debug, Clone)] pub struct OptimizerResult> { - /// + /// Solution pub x: X, - /// + /// f(x) value pub f_x: T, - /// + /// number of iterations pub iterations: usize, } diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs index 9a2656cd..8357d8da 100644 --- a/src/optimization/line_search.rs +++ b/src/optimization/line_search.rs @@ -1,11 +1,9 @@ -// TODO: missing documentation - use crate::optimization::FunctionOrder; use num_traits::Float; -/// +/// Line search optimization. pub trait LineSearchMethod { - /// + /// Find alpha that satisfies strong Wolfe conditions. fn search( &self, f: &(dyn Fn(T) -> T), @@ -16,32 +14,31 @@ pub trait LineSearchMethod { ) -> LineSearchResult; } -/// +/// Line search result #[derive(Debug, Clone)] pub struct LineSearchResult { - /// + /// Alpha value pub alpha: T, - /// + /// f(alpha) value pub f_x: T, } -/// +/// Backtracking line search method. pub struct Backtracking { - /// + /// TODO: Add documentation pub c1: T, - /// + /// Maximum number of iterations for Backtracking single run pub max_iterations: usize, - /// + /// TODO: Add documentation pub max_infinity_iterations: usize, - /// + /// TODO: Add documentation pub phi: T, - /// + /// TODO: Add documentation pub plo: T, - /// + /// function order pub order: FunctionOrder, } -/// impl Default for Backtracking { fn default() -> Self { Backtracking { @@ -55,9 +52,7 @@ impl Default for Backtracking { } } -/// impl LineSearchMethod for Backtracking { - /// fn search( &self, f: &(dyn Fn(T) -> T), diff --git a/src/optimization/mod.rs b/src/optimization/mod.rs index 2f6c41a2..83ca2493 100644 --- a/src/optimization/mod.rs +++ b/src/optimization/mod.rs @@ -1,21 +1,19 @@ -// TODO: missing documentation - -/// +/// first order optimization algorithms pub mod first_order; -/// +/// line search algorithms pub mod line_search; -/// +/// Function f(x) = y pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a; -/// +/// Function df(x) pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a; -/// +/// Function order #[allow(clippy::upper_case_acronyms)] #[derive(Debug, PartialEq, Eq)] pub enum FunctionOrder { - /// + /// Second order SECOND, - /// + /// Third order THIRD, } diff --git a/src/preprocessing/categorical.rs b/src/preprocessing/categorical.rs index 933d7c2b..e17dea78 100644 --- a/src/preprocessing/categorical.rs +++ b/src/preprocessing/categorical.rs @@ -12,7 +12,7 @@ //! &[1.5, 2.0, 1.5, 4.0], //! &[1.5, 1.0, 1.5, 5.0], //! &[1.5, 2.0, 1.5, 6.0], -//! ]); +//! ]).unwrap(); //! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]); //! // Infer number of categories from data and return a reusable encoder //! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap(); @@ -240,14 +240,16 @@ mod tests { &[2.0, 1.5, 4.0], &[1.0, 1.5, 5.0], &[2.0, 1.5, 6.0], - ]); + ]) + .unwrap(); let oh_enc = DenseMatrix::from_2d_array(&[ &[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0], &[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0], &[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0], &[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0], - ]); + ]) + .unwrap(); (orig, oh_enc) } @@ -259,14 +261,16 @@ mod tests { &[1.5, 2.0, 1.5, 4.0], &[1.5, 1.0, 1.5, 5.0], &[1.5, 2.0, 1.5, 6.0], - ]); + ]) + .unwrap(); let oh_enc = DenseMatrix::from_2d_array(&[ &[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0], &[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0], &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0], &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0], - ]); + ]) + .unwrap(); (orig, oh_enc) } @@ -277,7 +281,7 @@ mod tests { )] #[test] fn hash_encode_f64_series() { - let series = vec![3.0, 1.0, 2.0, 1.0]; + let series = [3.0, 1.0, 2.0, 1.0]; let hashable_series: Vec = series.iter().map(|v| v.to_category()).collect(); let enc = CategoryMapper::from_positional_category_vec(hashable_series); @@ -334,7 +338,8 @@ mod tests { &[2.0, 1.5, 4.0], &[1.0, 1.5, 5.0], &[2.0, 1.5, 6.0], - ]); + ]) + .unwrap(); let params = OneHotEncoderParams::from_cat_idx(&[1]); let result = OneHotEncoder::fit(&m, params); diff --git a/src/preprocessing/numerical.rs b/src/preprocessing/numerical.rs index c673731b..674f6814 100644 --- a/src/preprocessing/numerical.rs +++ b/src/preprocessing/numerical.rs @@ -11,7 +11,7 @@ //! vec![0.0, 0.0], //! vec![1.0, 1.0], //! vec![1.0, 1.0], -//! ]); +//! ]).unwrap(); //! //! let standard_scaler = //! numerical::StandardScaler::fit(&data, numerical::StandardScalerParameters::default()) @@ -24,7 +24,7 @@ //! vec![-1.0, -1.0], //! vec![1.0, 1.0], //! vec![1.0, 1.0], -//! ]) +//! ]).unwrap() //! ); //! ``` use std::marker::PhantomData; @@ -172,18 +172,14 @@ where T: Number + RealNumber, M: Array2, { - if let Some(output_matrix) = columns.first().cloned() { - return Some( - columns - .iter() - .skip(1) - .fold(output_matrix, |current_matrix, new_colum| { - current_matrix.h_stack(new_colum) - }), - ); - } else { - None - } + columns.first().cloned().map(|output_matrix| { + columns + .iter() + .skip(1) + .fold(output_matrix, |current_matrix, new_colum| { + current_matrix.h_stack(new_colum) + }) + }) } #[cfg(test)] @@ -197,15 +193,18 @@ mod tests { fn combine_three_columns() { assert_eq!( build_matrix_from_columns(vec![ - DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]), - DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]), - DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]) + DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]).unwrap(), + DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]).unwrap(), + DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]).unwrap() ]), - Some(DenseMatrix::from_2d_vec(&vec![ - vec![1.0, 2.0, 3.0], - vec![1.0, 2.0, 3.0], - vec![1.0, 2.0, 3.0] - ])) + Some( + DenseMatrix::from_2d_vec(&vec![ + vec![1.0, 2.0, 3.0], + vec![1.0, 2.0, 3.0], + vec![1.0, 2.0, 3.0] + ]) + .unwrap() + ) ) } @@ -287,13 +286,15 @@ mod tests { /// sklearn. #[test] fn fit_transform_random_values() { - let transformed_values = - fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[ + let transformed_values = fit_transform_with_default_standard_scaler( + &DenseMatrix::from_2d_array(&[ &[0.1004222429, 0.2194113576, 0.9310663354, 0.3313593793], &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], - ])); + ]) + .unwrap(), + ); println!("{transformed_values}"); assert!(transformed_values.approximate_eq( &DenseMatrix::from_2d_array(&[ @@ -301,7 +302,8 @@ mod tests { &[-0.7615464283, -0.7076698384, -1.1075452562, 1.2632979631], &[0.4832504303, -0.6106747444, 1.0630075435, 0.5494084257], &[1.3936980634, 1.7215431158, -0.8839228078, -1.3855590021], - ]), + ]) + .unwrap(), 1.0 )) } @@ -310,13 +312,10 @@ mod tests { #[test] fn fit_transform_with_zero_variance() { assert_eq!( - fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[ - &[1.0], - &[1.0], - &[1.0], - &[1.0] - ])), - DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]), + fit_transform_with_default_standard_scaler( + &DenseMatrix::from_2d_array(&[&[1.0], &[1.0], &[1.0], &[1.0]]).unwrap() + ), + DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]).unwrap(), "When scaling values with zero variance, zero is expected as return value" ) } @@ -331,7 +330,8 @@ mod tests { &[1.0, 2.0, 5.0], &[1.0, 1.0, 1.0], &[1.0, 2.0, 5.0] - ]), + ]) + .unwrap(), StandardScalerParameters::default(), ), Ok(StandardScaler { @@ -354,7 +354,8 @@ mod tests { &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], - ]), + ]) + .unwrap(), StandardScalerParameters::default(), ) .unwrap(); @@ -364,17 +365,18 @@ mod tests { vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625], ); - assert!( - &DenseMatrix::::from_2d_vec(&vec![fitted_scaler.stds]).approximate_eq( + assert!(&DenseMatrix::::from_2d_vec(&vec![fitted_scaler.stds]) + .unwrap() + .approximate_eq( &DenseMatrix::from_2d_array(&[&[ 0.29426447500954, 0.16758497615485, 0.20820945786863, 0.23329718831165 - ],]), + ],]) + .unwrap(), 0.00000000000001 - ) - ) + )) } /// If `with_std` is set to `false` the values should not be @@ -392,8 +394,9 @@ mod tests { }; assert_eq!( - standard_scaler.transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]])), - Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]])) + standard_scaler + .transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]]).unwrap()), + Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]]).unwrap()) ) } @@ -413,8 +416,8 @@ mod tests { assert_eq!( standard_scaler - .transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]])), - Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]])) + .transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]]).unwrap()), + Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]]).unwrap()) ) } @@ -433,7 +436,8 @@ mod tests { &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], - ]), + ]) + .unwrap(), StandardScalerParameters::default(), ) .unwrap(); @@ -446,17 +450,18 @@ mod tests { vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625], ); - assert!( - &DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds]).approximate_eq( + assert!(&DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds]) + .unwrap() + .approximate_eq( &DenseMatrix::from_2d_array(&[&[ 0.29426447500954, 0.16758497615485, 0.20820945786863, 0.23329718831165 - ],]), + ],]) + .unwrap(), 0.00000000000001 - ) - ) + )) } } } diff --git a/src/readers/csv.rs b/src/readers/csv.rs index 730f2932..e9a88436 100644 --- a/src/readers/csv.rs +++ b/src/readers/csv.rs @@ -30,7 +30,7 @@ pub struct CSVDefinition<'a> { /// What seperates the fields in your csv-file? field_seperator: &'a str, } -impl<'a> Default for CSVDefinition<'a> { +impl Default for CSVDefinition<'_> { fn default() -> Self { Self { n_rows_header: 1, @@ -83,7 +83,7 @@ where Matrix: Array2, { let csv_text = read_string_from_source(source)?; - let rows: Vec> = extract_row_vectors_from_csv_text::( + let rows: Vec> = extract_row_vectors_from_csv_text( &csv_text, &definition, detect_row_format(&csv_text, &definition)?, @@ -103,12 +103,7 @@ where /// Given a string containing the contents of a csv file, extract its value /// into row-vectors. -fn extract_row_vectors_from_csv_text< - 'a, - T: Number + RealNumber + std::str::FromStr, - RowVector: Array1, - Matrix: Array2, ->( +fn extract_row_vectors_from_csv_text<'a, T: Number + RealNumber + std::str::FromStr>( csv_text: &'a str, definition: &'a CSVDefinition<'_>, row_format: CSVRowFormat<'_>, @@ -243,7 +238,8 @@ mod tests { &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], &[4.7, 3.2, 1.3, 0.2], - ])) + ]) + .unwrap()) ) } #[test] @@ -266,7 +262,7 @@ mod tests { &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], &[4.7, 3.2, 1.3, 0.2], - ])) + ]).unwrap()) ) } #[test] @@ -305,12 +301,11 @@ mod tests { } mod extract_row_vectors_from_csv_text { use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat}; - use crate::linalg::basic::matrix::DenseMatrix; #[test] fn read_default_csv() { assert_eq!( - extract_row_vectors_from_csv_text::, DenseMatrix<_>>( + extract_row_vectors_from_csv_text::( "column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0", &CSVDefinition::default(), CSVRowFormat { diff --git a/src/svm/mod.rs b/src/svm/mod.rs index b2bd79cb..f6baf8bb 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -56,7 +56,7 @@ pub struct Kernels; impl Kernels { /// Return a default linear pub fn linear() -> LinearKernel { - LinearKernel::default() + LinearKernel } /// Return a default RBF pub fn rbf() -> RBFKernel { @@ -292,7 +292,7 @@ mod tests { .unwrap() .abs(); - assert!((4913f64 - result) < std::f64::EPSILON); + assert!((4913f64 - result).abs() < f64::EPSILON); } #[cfg_attr( diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 131f44c2..cc5a0beb 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -53,7 +53,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y = vec![ -1, -1, -1, -1, -1, -1, -1, -1, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! @@ -322,19 +322,26 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array let (n, _) = x.shape(); let mut y_hat: Vec = Array1::zeros(n); + let mut row = Vec::with_capacity(n); for i in 0..n { - let row_pred: TX = - self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)); + row.clear(); + row.extend(x.get_row(i).iterator(0).copied()); + let row_pred: TX = self.predict_for_row(&row); y_hat.set(i, row_pred); } Ok(y_hat) } - fn predict_for_row(&self, x: Vec) -> TX { + fn predict_for_row(&self, x: &[TX]) -> TX { let mut f = self.b.unwrap(); + let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.instances.as_ref().unwrap().len() { + let xj: Vec<_> = self.instances.as_ref().unwrap()[i] + .iter() + .map(|e| e.to_f64().unwrap()) + .collect(); f += self.w.as_ref().unwrap()[i] * TX::from( self.parameters @@ -343,13 +350,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array .kernel .as_ref() .unwrap() - .apply( - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.instances.as_ref().unwrap()[i] - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), - ) + .apply(&xi, &xj) .unwrap(), ) .unwrap(); @@ -359,8 +360,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array } } -impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1> PartialEq - for SVC<'a, TX, TY, X, Y> +impl, Y: Array1> PartialEq + for SVC<'_, TX, TY, X, Y> { fn eq(&self, other: &Self) -> bool { if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two() @@ -472,14 +473,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let tol = self.parameters.tol; let good_enough = TX::from_i32(1000).unwrap(); + let mut x = Vec::with_capacity(n); for _ in 0..self.parameters.epoch { for i in self.permutate(n) { - self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - &mut cache, - ); + x.clear(); + x.extend(self.x.get_row(i).iterator(0).take(n).copied()); + self.process(i, &x, *self.y.get(i), &mut cache); loop { self.reprocess(tol, &mut cache); self.find_min_max_gradient(); @@ -511,24 +510,17 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let mut cp = 0; let mut cn = 0; + let mut x = Vec::with_capacity(n); for i in self.permutate(n) { + x.clear(); + x.extend(self.x.get_row(i).iterator(0).take(n).copied()); if *self.y.get(i) == TY::one() && cp < few { - if self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - cache, - ) { + if self.process(i, &x, *self.y.get(i), cache) { cp += 1; } } else if *self.y.get(i) == TY::from(-1).unwrap() && cn < few - && self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - cache, - ) + && self.process(i, &x, *self.y.get(i), cache) { cn += 1; } @@ -539,7 +531,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } - fn process(&mut self, i: usize, x: Vec, y: TY, cache: &mut Cache) -> bool { + fn process(&mut self, i: usize, x: &[TX], y: TY, cache: &mut Cache) -> bool { for j in 0..self.sv.len() { if self.sv[j].index == i { return true; @@ -551,15 +543,14 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let mut cache_values: Vec<((usize, usize), TX)> = Vec::new(); for v in self.sv.iter() { + let xi: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); + let xj: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); let k = self .parameters .kernel .as_ref() .unwrap() - .apply( - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(); cache_values.push(((i, v.index), TX::from(k).unwrap())); g -= v.alpha * k; @@ -578,7 +569,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 cache.insert(v.0, v.1.to_f64().unwrap()); } - let x_f64 = x.iter().map(|e| e.to_f64().unwrap()).collect(); + let x_f64: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); let k_v = self .parameters .kernel @@ -701,8 +692,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let km = sv1.k; let gm = sv1.grad; let mut best = 0f64; + let xi: Vec<_> = sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { let v = &self.sv[i]; + let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); let z = v.grad - gm; let k = cache.get( sv1, @@ -711,10 +704,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(), ); let mut curv = km + v.k - 2f64 * k; @@ -732,6 +722,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } + let xi: Vec<_> = self.sv[idx_1] + .x + .iter() + .map(|e| e.to_f64().unwrap()) + .collect::>(); + idx_2.map(|idx_2| { ( idx_1, @@ -742,16 +738,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .as_ref() .unwrap() .apply( - &self.sv[idx_1] - .x - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), + &xi, &self.sv[idx_2] .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), ) .unwrap() }), @@ -765,8 +757,11 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let km = sv2.k; let gm = sv2.grad; let mut best = 0f64; + + let xi: Vec<_> = sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { let v = &self.sv[i]; + let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); let z = gm - v.grad; let k = cache.get( sv2, @@ -775,10 +770,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(), ); let mut curv = km + v.k - 2f64 * k; @@ -797,6 +789,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } + let xj: Vec<_> = self.sv[idx_2] + .x + .iter() + .map(|e| e.to_f64().unwrap()) + .collect(); + idx_1.map(|idx_1| { ( idx_1, @@ -811,12 +809,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), - &self.sv[idx_2] - .x - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), + &xj, ) .unwrap() }), @@ -835,12 +829,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), &self.sv[idx_2] .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), ) .unwrap(), )), @@ -895,7 +889,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 self.sv[v1].alpha -= step.to_f64().unwrap(); self.sv[v2].alpha += step.to_f64().unwrap(); + let xi_v1: Vec<_> = self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(); + let xi_v2: Vec<_> = self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { + let xj: Vec<_> = self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(); let k2 = cache.get( &self.sv[v2], &self.sv[i], @@ -903,10 +900,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi_v2, &xj) .unwrap(), ); let k1 = cache.get( @@ -916,10 +910,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi_v1, &xj) .unwrap(), ); self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1); @@ -966,7 +957,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -992,7 +984,8 @@ mod tests { )] #[test] fn svc_fit_decision_function() { - let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]); + let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]) + .unwrap(); let x2 = DenseMatrix::from_2d_array(&[ &[3.0, 3.0], @@ -1001,7 +994,8 @@ mod tests { &[10.0, 10.0], &[1.0, 1.0], &[0.0, 0.0], - ]); + ]) + .unwrap(); let y: Vec = vec![-1, -1, 1, 1]; @@ -1054,7 +1048,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -1103,7 +1098,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -1114,7 +1110,7 @@ mod tests { let svc = SVC::fit(&x, &y, ¶ms).unwrap(); // serialization - let deserialized_svc: SVC = + let deserialized_svc: SVC<'_, f64, i32, _, _> = serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap(); assert_eq!(svc, deserialized_svc); diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 6fbd15be..4ce0aa28 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -44,7 +44,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -248,19 +248,20 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' let mut y_hat: Vec = Vec::::zeros(n); + let mut x_i = Vec::with_capacity(n); for i in 0..n { - y_hat.set( - i, - self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)), - ); + x_i.clear(); + x_i.extend(x.get_row(i).iterator(0).copied()); + y_hat.set(i, self.predict_for_row(&x_i)); } Ok(y_hat) } - pub(crate) fn predict_for_row(&self, x: Vec) -> T { + pub(crate) fn predict_for_row(&self, x: &[T]) -> T { let mut f = self.b; + let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.instances.as_ref().unwrap().len() { f += self.w.as_ref().unwrap()[i] * T::from( @@ -270,10 +271,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' .kernel .as_ref() .unwrap() - .apply( - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.instances.as_ref().unwrap()[i], - ) + .apply(&xi, &self.instances.as_ref().unwrap()[i]) .unwrap(), ) .unwrap() @@ -283,8 +281,8 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' } } -impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> PartialEq - for SVR<'a, T, X, Y> +impl, Y: Array1> PartialEq + for SVR<'_, T, X, Y> { fn eq(&self, other: &Self) -> bool { if (self.b - other.b).abs() > T::epsilon() * T::two() @@ -642,7 +640,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -690,7 +689,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -702,7 +702,7 @@ mod tests { let svr = SVR::fit(&x, &y, ¶ms).unwrap(); - let deserialized_svr: SVR, _> = + let deserialized_svr: SVR<'_, f64, DenseMatrix, _> = serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap(); assert_eq!(svr, deserialized_svr); diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 4f36e5b9..5679516a 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -48,7 +48,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y = vec![ 0, 0, 0, 0, 0, 0, 0, 0, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! @@ -77,7 +77,9 @@ use serde::{Deserialize, Serialize}; use crate::api::{Predictor, SupervisedEstimator}; use crate::error::Failed; +use crate::linalg::basic::arrays::MutArray; use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1}; +use crate::linalg::basic::matrix::DenseMatrix; use crate::numbers::basenum::Number; use crate::rand_custom::get_rng_impl; @@ -116,6 +118,7 @@ pub struct DecisionTreeClassifier< num_classes: usize, classes: Vec, depth: u16, + num_features: usize, _phantom_tx: PhantomData, _phantom_x: PhantomData, _phantom_y: PhantomData, @@ -159,11 +162,13 @@ pub enum SplitCriterion { #[derive(Debug, Clone)] struct Node { output: usize, + n_node_samples: usize, split_feature: usize, split_value: Option, split_score: Option, true_child: Option, false_child: Option, + impurity: Option, } impl, Y: Array1> PartialEq @@ -194,12 +199,12 @@ impl PartialEq for Node { self.output == other.output && self.split_feature == other.split_feature && match (self.split_value, other.split_value) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } && match (self.split_score, other.split_score) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } @@ -400,14 +405,16 @@ impl Default for DecisionTreeClassifierSearchParameters { } impl Node { - fn new(output: usize) -> Self { + fn new(output: usize, n_node_samples: usize) -> Self { Node { output, + n_node_samples, split_feature: 0, split_value: Option::None, split_score: Option::None, true_child: Option::None, false_child: Option::None, + impurity: Option::None, } } } @@ -507,6 +514,7 @@ impl, Y: Array1> num_classes: 0usize, classes: vec![], depth: 0u16, + num_features: 0usize, _phantom_tx: PhantomData, _phantom_x: PhantomData, _phantom_y: PhantomData, @@ -578,7 +586,7 @@ impl, Y: Array1> count[yi[i]] += samples[i]; } - let root = Node::new(which_max(&count)); + let root = Node::new(which_max(&count), y_ncols); change_nodes.push(root); let mut order: Vec> = Vec::new(); @@ -593,6 +601,7 @@ impl, Y: Array1> num_classes: k, classes, depth: 0u16, + num_features: num_attributes, _phantom_tx: PhantomData, _phantom_x: PhantomData, _phantom_y: PhantomData, @@ -606,7 +615,7 @@ impl, Y: Array1> visitor_queue.push_back(visitor); } - while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) { + while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) { match visitor_queue.pop_front() { Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng), None => break, @@ -643,7 +652,7 @@ impl, Y: Array1> if node.true_child.is_none() && node.false_child.is_none() { result = node.output; } else if x.get((row, node.split_feature)).to_f64().unwrap() - <= node.split_value.unwrap_or(std::f64::NAN) + <= node.split_value.unwrap_or(f64::NAN) { queue.push_back(node.true_child.unwrap()); } else { @@ -678,16 +687,7 @@ impl, Y: Array1> } } - if is_pure { - return false; - } - let n = visitor.samples.iter().sum(); - - if n <= self.parameters().min_samples_split { - return false; - } - let mut count = vec![0; self.num_classes]; let mut false_count = vec![0; self.num_classes]; for i in 0..n_rows { @@ -696,7 +696,15 @@ impl, Y: Array1> } } - let parent_impurity = impurity(&self.parameters().criterion, &count, n); + self.nodes[visitor.node].impurity = Some(impurity(&self.parameters().criterion, &count, n)); + + if is_pure { + return false; + } + + if n <= self.parameters().min_samples_split { + return false; + } let mut variables = (0..n_attr).collect::>(); @@ -705,14 +713,7 @@ impl, Y: Array1> } for variable in variables.iter().take(mtry) { - self.find_best_split( - visitor, - n, - &count, - &mut false_count, - parent_impurity, - *variable, - ); + self.find_best_split(visitor, n, &count, &mut false_count, *variable); } self.nodes()[visitor.node].split_score.is_some() @@ -724,7 +725,6 @@ impl, Y: Array1> n: usize, count: &[usize], false_count: &mut [usize], - parent_impurity: f64, j: usize, ) { let mut true_count = vec![0; self.num_classes]; @@ -760,6 +760,7 @@ impl, Y: Array1> let true_label = which_max(&true_count); let false_label = which_max(false_count); + let parent_impurity = self.nodes()[visitor.node].impurity.unwrap(); let gain = parent_impurity - tc as f64 / n as f64 * impurity(&self.parameters().criterion, &true_count, tc) @@ -804,9 +805,7 @@ impl, Y: Array1> .get((i, self.nodes()[visitor.node].split_feature)) .to_f64() .unwrap() - <= self.nodes()[visitor.node] - .split_value - .unwrap_or(std::f64::NAN) + <= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN) { *true_sample = visitor.samples[i]; tc += *true_sample; @@ -827,9 +826,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes.push(Node::new(visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output, tc)); let false_child_idx = self.nodes().len(); - self.nodes.push(Node::new(visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output, fc)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); @@ -863,11 +862,104 @@ impl, Y: Array1> true } + + /// Compute feature importances for the fitted tree. + pub fn compute_feature_importances(&self, normalize: bool) -> Vec { + let mut importances = vec![0f64; self.num_features]; + + for node in self.nodes().iter() { + if node.true_child.is_none() && node.false_child.is_none() { + continue; + } + let left = &self.nodes()[node.true_child.unwrap()]; + let right = &self.nodes()[node.false_child.unwrap()]; + + importances[node.split_feature] += node.n_node_samples as f64 * node.impurity.unwrap() + - left.n_node_samples as f64 * left.impurity.unwrap() + - right.n_node_samples as f64 * right.impurity.unwrap(); + } + for item in importances.iter_mut() { + *item /= self.nodes()[0].n_node_samples as f64; + } + if normalize { + let sum = importances.iter().sum::(); + for importance in importances.iter_mut() { + *importance /= sum; + } + } + importances + } + + /// Predict class probabilities for the input samples. + /// + /// # Arguments + /// + /// * `x` - The input samples as a matrix where each row is a sample and each column is a feature. + /// + /// # Returns + /// + /// A `Result` containing a `DenseMatrix` where each row corresponds to a sample and each column + /// corresponds to a class. The values represent the probability of the sample belonging to each class. + /// + /// # Errors + /// + /// Returns an error if at least one row prediction process fails. + pub fn predict_proba(&self, x: &X) -> Result, Failed> { + let (n_samples, _) = x.shape(); + let n_classes = self.classes().len(); + let mut result = DenseMatrix::::zeros(n_samples, n_classes); + + for i in 0..n_samples { + let probs = self.predict_proba_for_row(x, i)?; + for (j, &prob) in probs.iter().enumerate() { + result.set((i, j), prob); + } + } + + Ok(result) + } + + /// Predict class probabilities for a single input sample. + /// + /// # Arguments + /// + /// * `x` - The input matrix containing all samples. + /// * `row` - The index of the row in `x` for which to predict probabilities. + /// + /// # Returns + /// + /// A vector of probabilities, one for each class, representing the probability + /// of the input sample belonging to each class. + fn predict_proba_for_row(&self, x: &X, row: usize) -> Result, Failed> { + let mut node = 0; + + while let Some(current_node) = self.nodes().get(node) { + if current_node.true_child.is_none() && current_node.false_child.is_none() { + // Leaf node reached + let mut probs = vec![0.0; self.classes().len()]; + probs[current_node.output] = 1.0; + return Ok(probs); + } + + let split_feature = current_node.split_feature; + let split_value = current_node.split_value.unwrap_or(f64::NAN); + + if x.get((row, split_feature)).to_f64().unwrap() <= split_value { + node = current_node.true_child.unwrap(); + } else { + node = current_node.false_child.unwrap(); + } + } + + // This should never happen if the tree is properly constructed + Err(Failed::predict("Nodes iteration did not reach leaf")) + } } #[cfg(test)] mod tests { use super::*; + use crate::linalg::basic::arrays::Array; use crate::linalg::basic::matrix::DenseMatrix; #[test] @@ -899,17 +991,62 @@ mod tests { )] #[test] fn gini_impurity() { - assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < std::f64::EPSILON); + assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < f64::EPSILON); assert!( (impurity(&SplitCriterion::Entropy, &[7, 3], 10) - 0.8812908992306927).abs() - < std::f64::EPSILON + < f64::EPSILON ); assert!( (impurity(&SplitCriterion::ClassificationError, &[7, 3], 10) - 0.3).abs() - < std::f64::EPSILON + < f64::EPSILON ); } + #[cfg_attr( + all(target_arch = "wasm32", not(target_os = "wasi")), + wasm_bindgen_test::wasm_bindgen_test + )] + #[test] + fn test_predict_proba() { + let x: DenseMatrix = DenseMatrix::from_2d_array(&[ + &[5.1, 3.5, 1.4, 0.2], + &[4.9, 3.0, 1.4, 0.2], + &[4.7, 3.2, 1.3, 0.2], + &[4.6, 3.1, 1.5, 0.2], + &[5.0, 3.6, 1.4, 0.2], + &[7.0, 3.2, 4.7, 1.4], + &[6.4, 3.2, 4.5, 1.5], + &[6.9, 3.1, 4.9, 1.5], + &[5.5, 2.3, 4.0, 1.3], + &[6.5, 2.8, 4.6, 1.5], + ]) + .unwrap(); + let y: Vec = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1]; + + let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); + let probabilities = tree.predict_proba(&x).unwrap(); + + assert_eq!(probabilities.shape(), (10, 2)); + + for row in 0..10 { + let row_sum: f64 = probabilities.get_row(row).sum(); + assert!( + (row_sum - 1.0).abs() < 1e-6, + "Row probabilities should sum to 1" + ); + } + + // Check if the first 5 samples have higher probability for class 0 + for i in 0..5 { + assert!(probabilities.get((i, 0)) > probabilities.get((i, 1))); + } + + // Check if the last 5 samples have higher probability for class 1 + for i in 5..10 { + assert!(probabilities.get((i, 1)) > probabilities.get((i, 0))); + } + } + #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test @@ -938,7 +1075,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; assert_eq!( @@ -1005,7 +1143,8 @@ mod tests { &[0., 0., 1., 1.], &[0., 0., 0., 0.], &[0., 0., 0., 1.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; assert_eq!( @@ -1016,6 +1155,43 @@ mod tests { ); } + #[test] + fn test_compute_feature_importances() { + let x: DenseMatrix = DenseMatrix::from_2d_array(&[ + &[1., 1., 1., 0.], + &[1., 1., 1., 0.], + &[1., 1., 1., 1.], + &[1., 1., 0., 0.], + &[1., 1., 0., 1.], + &[1., 0., 1., 0.], + &[1., 0., 1., 0.], + &[1., 0., 1., 1.], + &[1., 0., 0., 0.], + &[1., 0., 0., 1.], + &[0., 1., 1., 0.], + &[0., 1., 1., 0.], + &[0., 1., 1., 1.], + &[0., 1., 0., 0.], + &[0., 1., 0., 1.], + &[0., 0., 1., 0.], + &[0., 0., 1., 0.], + &[0., 0., 1., 1.], + &[0., 0., 0., 0.], + &[0., 0., 0., 1.], + ]) + .unwrap(); + let y: Vec = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; + let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); + assert_eq!( + tree.compute_feature_importances(false), + vec![0., 0., 0.21333333333333332, 0.26666666666666666] + ); + assert_eq!( + tree.compute_feature_importances(true), + vec![0., 0., 0.4444444444444444, 0.5555555555555556] + ); + } + #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test @@ -1044,7 +1220,8 @@ mod tests { &[0., 0., 1., 1.], &[0., 0., 0., 0.], &[0., 0., 0., 1.], - ]); + ]) + .unwrap(); let y = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index d21c7490..d735697d 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -39,7 +39,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, //! 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -311,15 +311,15 @@ impl Node { impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { - (self.output - other.output).abs() < std::f64::EPSILON + (self.output - other.output).abs() < f64::EPSILON && self.split_feature == other.split_feature && match (self.split_value, other.split_value) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } && match (self.split_score, other.split_score) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } @@ -478,7 +478,7 @@ impl, Y: Array1> visitor_queue.push_back(visitor); } - while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) { + while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) { match visitor_queue.pop_front() { Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng), None => break, @@ -515,7 +515,7 @@ impl, Y: Array1> if node.true_child.is_none() && node.false_child.is_none() { result = node.output; } else if x.get((row, node.split_feature)).to_f64().unwrap() - <= node.split_value.unwrap_or(std::f64::NAN) + <= node.split_value.unwrap_or(f64::NAN) { queue.push_back(node.true_child.unwrap()); } else { @@ -640,9 +640,7 @@ impl, Y: Array1> .get((i, self.nodes()[visitor.node].split_feature)) .to_f64() .unwrap() - <= self.nodes()[visitor.node] - .split_value - .unwrap_or(std::f64::NAN) + <= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN) { *true_sample = visitor.samples[i]; tc += *true_sample; @@ -753,7 +751,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -767,7 +766,7 @@ mod tests { assert!((y_hat[i] - y[i]).abs() < 0.1); } - let expected_y = vec![ + let expected_y = [ 87.3, 87.3, 87.3, 87.3, 98.9, 98.9, 98.9, 98.9, 98.9, 107.9, 107.9, 107.9, 114.85, 114.85, 114.85, 114.85, ]; @@ -788,7 +787,7 @@ mod tests { assert!((y_hat[i] - expected_y[i]).abs() < 0.1); } - let expected_y = vec![ + let expected_y = [ 83.0, 88.35, 88.35, 89.5, 97.15, 97.15, 99.5, 99.5, 101.2, 104.6, 109.6, 109.6, 113.4, 113.4, 116.30, 116.30, ]; @@ -834,7 +833,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, From be270c4570d7af83b3df99366e7b74f5936157cc Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Wed, 22 Jan 2025 13:23:57 +0000 Subject: [PATCH 04/10] merge changes --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a30db160..b2e186a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "smartcore" description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.3.1" +version = "0.4.0" authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" @@ -48,7 +48,7 @@ getrandom = { version = "0.2.8", optional = true } wasm-bindgen-test = "0.3" [dev-dependencies] -itertools = "0.10.5" +itertools = "0.13.0" serde_json = "1.0" bincode = "1.3.1" From 3f6c334307d38a7ff3046da3808d0214e224a7e2 Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Wed, 22 Jan 2025 13:28:10 +0000 Subject: [PATCH 05/10] fix clippy --- src/naive_bayes/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index c30eabc9..d7d02e05 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -40,7 +40,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1}; use crate::numbers::basenum::Number; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use std::{cmp::Ordering, marker::PhantomData}; +use std::marker::PhantomData; /// Distribution used in the Naive Bayes classifier. pub(crate) trait NBDistribution: Clone { From e44f8b18d7bae86d4884c01f2196ba2b4299f536 Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Fri, 24 Jan 2025 09:15:44 +0000 Subject: [PATCH 06/10] Add tests --- src/naive_bayes/mod.rs | 119 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index d7d02e05..4e5ef029 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -193,4 +193,123 @@ mod tests { Err(_) => panic!("Should success in normal case without NaNs"), } } + + // A simple test distribution using float + #[derive(Debug, PartialEq, Clone)] + struct TestDistributionAgain { + classes: Vec, + probs: Vec, + } + + impl NBDistribution for TestDistributionAgain { + fn classes(&self) -> &Vec { + &self.classes + } + fn prior(&self, class_index: usize) -> f64 { + self.probs[class_index] + } + fn log_likelihood<'a>( + &'a self, + class_index: usize, + _j: &'a Box + 'a>, + ) -> f64 { + self.probs[class_index].ln() + } + } + + type TestNB = BaseNaiveBayes, Vec, TestDistributionAgain>; + + #[test] + fn test_predict_empty_classes() { + let dist = TestDistributionAgain { + classes: vec![], + probs: vec![], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + assert!(nb.predict(&x).is_err()); + } + + #[test] + fn test_predict_single_class() { + let dist = TestDistributionAgain { + classes: vec![1], + probs: vec![1.0], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let result = nb.predict(&x).unwrap(); + assert_eq!(result, vec![1, 1]); + } + + #[test] + fn test_predict_multiple_classes() { + let dist = TestDistributionAgain { + classes: vec![1, 2, 3], + probs: vec![0.2, 0.5, 0.3], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0], &[5.0, 6.0]]).unwrap(); + let result = nb.predict(&x).unwrap(); + assert_eq!(result, vec![2, 2, 2]); + } + + #[test] + fn test_predict_with_nans() { + let dist = TestDistributionAgain { + classes: vec![1, 2], + probs: vec![f64::NAN, 0.5], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let result = nb.predict(&x).unwrap(); + assert_eq!(result, vec![2, 2]); + } + + #[test] + fn test_predict_all_nans() { + let dist = TestDistributionAgain { + classes: vec![1, 2], + probs: vec![f64::NAN, f64::NAN], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + assert!(nb.predict(&x).is_err()); + } + + #[test] + fn test_predict_extreme_probabilities() { + let dist = TestDistributionAgain { + classes: vec![1, 2], + probs: vec![1e-300, 1e-301], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let result = nb.predict(&x).unwrap(); + assert_eq!(result, vec![1, 1]); + } + + #[test] + fn test_predict_with_infinity() { + let dist = TestDistributionAgain { + classes: vec![1, 2, 3], + probs: vec![f64::INFINITY, 1.0, 2.0], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let result = nb.predict(&x).unwrap(); + assert_eq!(result, vec![1, 1]); + } + + #[test] + fn test_predict_with_negative_infinity() { + let dist = TestDistributionAgain { + classes: vec![1, 2, 3], + probs: vec![f64::NEG_INFINITY, 1.0, 2.0], + }; + let nb = TestNB::fit(dist).unwrap(); + let x = DenseMatrix::from_2d_array(&[&[1.0, 2.0], &[3.0, 4.0]]).unwrap(); + let result = nb.predict(&x).unwrap(); + assert_eq!(result, vec![3, 3]); + } } From 01a29b1030e59cc0f872b6ae38f432af588de488 Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Fri, 24 Jan 2025 11:30:38 +0000 Subject: [PATCH 07/10] Add test with static values --- src/naive_bayes/mod.rs | 174 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 4e5ef029..eaa9da1d 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -312,4 +312,176 @@ mod tests { let result = nb.predict(&x).unwrap(); assert_eq!(result, vec![3, 3]); } -} + + + #[test] + fn test_gaussian_naive_bayes_numerical_stability() { + #[derive(Debug, PartialEq, Clone)] + struct GaussianTestDistribution { + classes: Vec, + means: Vec>, + variances: Vec>, + priors: Vec, + } + + impl NBDistribution for GaussianTestDistribution { + fn classes(&self) -> &Vec { &self.classes } + + fn prior(&self, class_index: usize) -> f64 { + self.priors[class_index] + } + + fn log_likelihood<'a>(&'a self, class_index: usize, j: &'a Box + 'a>) -> f64 { + let means = &self.means[class_index]; + let variances = &self.variances[class_index]; + j.iterator(0) + .enumerate() + .map(|(i, &xi)| { + let mean = means[i]; + let var = variances[i] + 1e-9; // Small smoothing for numerical stability + let coeff = -0.5 * (2.0 * std::f64::consts::PI * var).ln(); + let exponent = -(xi - mean).powi(2) / (2.0 * var); + coeff + exponent + }) + .sum() + } + } + + fn train_distribution(x: &DenseMatrix, y: &[u32]) -> GaussianTestDistribution { + let mut classes: Vec = y.iter().cloned().collect::>().into_iter().collect(); + classes.sort(); + let n_classes = classes.len(); + let n_features = x.shape().1; + + let mut means = vec![vec![0.0; n_features]; n_classes]; + let mut variances = vec![vec![0.0; n_features]; n_classes]; + let mut class_counts = vec![0; n_classes]; + + // Calculate means and count samples per class + for (sample, &class) in x.row_iter().zip(y.iter()) { + let class_idx = classes.iter().position(|&c| c == class).unwrap(); + class_counts[class_idx] += 1; + for (i, &value) in sample.iterator(0).enumerate() { + means[class_idx][i] += value; + } + } + + // Normalize means + for (class_idx, mean) in means.iter_mut().enumerate() { + for value in mean.iter_mut() { + *value /= class_counts[class_idx] as f64; + } + } + + // Calculate variances + for (sample, &class) in x.row_iter().zip(y.iter()) { + let class_idx = classes.iter().position(|&c| c == class).unwrap(); + for (i, &value) in sample.iterator(0).enumerate() { + let diff = value - means[class_idx][i]; + variances[class_idx][i] += diff * diff; + } + } + + // Normalize variances and add small epsilon to avoid zero variance + let epsilon = 1e-9; + for (class_idx, variance) in variances.iter_mut().enumerate() { + for value in variance.iter_mut() { + *value = *value / class_counts[class_idx] as f64 + epsilon; + } + } + + // Calculate priors + let total_samples = y.len() as f64; + let priors: Vec = class_counts.iter().map(|&count| count as f64 / total_samples).collect(); + + GaussianTestDistribution { + classes, + means, + variances, + priors, + } + } + + type TestNBGaussian = BaseNaiveBayes, Vec, GaussianTestDistribution>; + + // Create a constant training dataset + let n_samples = 1000; + let n_features = 5; + let n_classes = 4; + + let mut x_data = Vec::with_capacity(n_samples * n_features); + let mut y_data = Vec::with_capacity(n_samples); + + for i in 0..n_samples { + for j in 0..n_features { + x_data.push((i * j) as f64 % 10.0); + } + y_data.push((i % n_classes) as u32); + } + + let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap(); + let y = y_data; + + // Train the model + let dist = train_distribution(&x, &y); + let nb = TestNBGaussian::fit(dist).unwrap(); + + // Create constant test data + let n_test_samples = 100; + let mut test_x_data = Vec::with_capacity(n_test_samples * n_features); + for i in 0..n_test_samples { + for j in 0..n_features { + test_x_data.push((i * j * 2) as f64 % 15.0); + } + } + let test_x = DenseMatrix::new(n_test_samples, n_features, test_x_data, true).unwrap(); + + // Make predictions + let predictions = nb.predict(&test_x).map_err(|e| format!("Prediction failed: {}", e)).unwrap(); + + // Check numerical stability + assert_eq!(predictions.len(), n_test_samples, "Number of predictions should match number of test samples"); + + // Check that all predictions are valid class labels + for &pred in predictions.iter() { + assert!(pred < n_classes as u32, "Predicted class should be valid"); + } + + // Check consistency of predictions + let repeated_predictions = nb.predict(&test_x).map_err(|e| format!("Repeated prediction failed: {}", e)).unwrap(); + assert_eq!(predictions, repeated_predictions, "Predictions should be consistent when repeated"); + + // Check extreme values + let extreme_x = DenseMatrix::new(2, n_features, vec![f64::MAX; n_features * 2], true).unwrap(); + let extreme_predictions = nb.predict(&extreme_x); + assert!(extreme_predictions.is_err(), "Extreme value input should result in an error"); + assert_eq!( + extreme_predictions.unwrap_err().to_string(), + "Predict failed: Failed to predict, all probabilities were NaN", + "Incorrect error message for extreme values" + ); + + // Check for NaN handling + let nan_x = DenseMatrix::new(2, n_features, vec![f64::NAN; n_features * 2], true).unwrap(); + let nan_predictions = nb.predict(&nan_x); + assert!(nan_predictions.is_err(), "NaN input should result in an error"); + + // Check for very small values + let small_x = DenseMatrix::new(2, n_features, vec![f64::MIN_POSITIVE; n_features * 2], true).unwrap(); + let small_predictions = nb.predict(&small_x).map_err(|e| format!("Small value prediction failed: {}", e)).unwrap(); + for &pred in small_predictions.iter() { + assert!(pred < n_classes as u32, "Predictions for very small values should be valid"); + } + + // Check for values close to zero + let near_zero_x = DenseMatrix::new(2, n_features, vec![1e-300; n_features * 2], true).unwrap(); + let near_zero_predictions = nb.predict(&near_zero_x).map_err(|e| format!("Near-zero value prediction failed: {}", e)).unwrap(); + for &pred in near_zero_predictions.iter() { + assert!(pred < n_classes as u32, "Predictions for near-zero values should be valid"); + } + + println!("All numerical stability checks passed!"); + } + + +} \ No newline at end of file From 6bfddffde79438ea32d5b107a743f66ab688d508 Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Fri, 24 Jan 2025 11:31:11 +0000 Subject: [PATCH 08/10] Add formatting --- src/naive_bayes/mod.rs | 222 +++++++++++++++++++++++++---------------- 1 file changed, 134 insertions(+), 88 deletions(-) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index eaa9da1d..142b944a 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -313,7 +313,6 @@ mod tests { assert_eq!(result, vec![3, 3]); } - #[test] fn test_gaussian_naive_bayes_numerical_stability() { #[derive(Debug, PartialEq, Clone)] @@ -323,15 +322,21 @@ mod tests { variances: Vec>, priors: Vec, } - + impl NBDistribution for GaussianTestDistribution { - fn classes(&self) -> &Vec { &self.classes } - + fn classes(&self) -> &Vec { + &self.classes + } + fn prior(&self, class_index: usize) -> f64 { self.priors[class_index] } - - fn log_likelihood<'a>(&'a self, class_index: usize, j: &'a Box + 'a>) -> f64 { + + fn log_likelihood<'a>( + &'a self, + class_index: usize, + j: &'a Box + 'a>, + ) -> f64 { let means = &self.means[class_index]; let variances = &self.variances[class_index]; j.iterator(0) @@ -346,17 +351,22 @@ mod tests { .sum() } } - + fn train_distribution(x: &DenseMatrix, y: &[u32]) -> GaussianTestDistribution { - let mut classes: Vec = y.iter().cloned().collect::>().into_iter().collect(); + let mut classes: Vec = y + .iter() + .cloned() + .collect::>() + .into_iter() + .collect(); classes.sort(); let n_classes = classes.len(); let n_features = x.shape().1; - + let mut means = vec![vec![0.0; n_features]; n_classes]; let mut variances = vec![vec![0.0; n_features]; n_classes]; let mut class_counts = vec![0; n_classes]; - + // Calculate means and count samples per class for (sample, &class) in x.row_iter().zip(y.iter()) { let class_idx = classes.iter().position(|&c| c == class).unwrap(); @@ -365,14 +375,14 @@ mod tests { means[class_idx][i] += value; } } - + // Normalize means for (class_idx, mean) in means.iter_mut().enumerate() { for value in mean.iter_mut() { *value /= class_counts[class_idx] as f64; } } - + // Calculate variances for (sample, &class) in x.row_iter().zip(y.iter()) { let class_idx = classes.iter().position(|&c| c == class).unwrap(); @@ -381,7 +391,7 @@ mod tests { variances[class_idx][i] += diff * diff; } } - + // Normalize variances and add small epsilon to avoid zero variance let epsilon = 1e-9; for (class_idx, variance) in variances.iter_mut().enumerate() { @@ -389,11 +399,14 @@ mod tests { *value = *value / class_counts[class_idx] as f64 + epsilon; } } - + // Calculate priors let total_samples = y.len() as f64; - let priors: Vec = class_counts.iter().map(|&count| count as f64 / total_samples).collect(); - + let priors: Vec = class_counts + .iter() + .map(|&count| count as f64 / total_samples) + .collect(); + GaussianTestDistribution { classes, means, @@ -401,87 +414,120 @@ mod tests { priors, } } - - type TestNBGaussian = BaseNaiveBayes, Vec, GaussianTestDistribution>; - - // Create a constant training dataset - let n_samples = 1000; - let n_features = 5; - let n_classes = 4; - let mut x_data = Vec::with_capacity(n_samples * n_features); - let mut y_data = Vec::with_capacity(n_samples); + type TestNBGaussian = + BaseNaiveBayes, Vec, GaussianTestDistribution>; - for i in 0..n_samples { - for j in 0..n_features { - x_data.push((i * j) as f64 % 10.0); - } - y_data.push((i % n_classes) as u32); - } + // Create a constant training dataset + let n_samples = 1000; + let n_features = 5; + let n_classes = 4; - let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap(); - let y = y_data; + let mut x_data = Vec::with_capacity(n_samples * n_features); + let mut y_data = Vec::with_capacity(n_samples); - // Train the model - let dist = train_distribution(&x, &y); - let nb = TestNBGaussian::fit(dist).unwrap(); - - // Create constant test data - let n_test_samples = 100; - let mut test_x_data = Vec::with_capacity(n_test_samples * n_features); - for i in 0..n_test_samples { - for j in 0..n_features { - test_x_data.push((i * j * 2) as f64 % 15.0); + for i in 0..n_samples { + for j in 0..n_features { + x_data.push((i * j) as f64 % 10.0); + } + y_data.push((i % n_classes) as u32); } - } - let test_x = DenseMatrix::new(n_test_samples, n_features, test_x_data, true).unwrap(); - // Make predictions - let predictions = nb.predict(&test_x).map_err(|e| format!("Prediction failed: {}", e)).unwrap(); + let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap(); + let y = y_data; - // Check numerical stability - assert_eq!(predictions.len(), n_test_samples, "Number of predictions should match number of test samples"); + // Train the model + let dist = train_distribution(&x, &y); + let nb = TestNBGaussian::fit(dist).unwrap(); - // Check that all predictions are valid class labels - for &pred in predictions.iter() { - assert!(pred < n_classes as u32, "Predicted class should be valid"); - } + // Create constant test data + let n_test_samples = 100; + let mut test_x_data = Vec::with_capacity(n_test_samples * n_features); + for i in 0..n_test_samples { + for j in 0..n_features { + test_x_data.push((i * j * 2) as f64 % 15.0); + } + } + let test_x = DenseMatrix::new(n_test_samples, n_features, test_x_data, true).unwrap(); + + // Make predictions + let predictions = nb + .predict(&test_x) + .map_err(|e| format!("Prediction failed: {}", e)) + .unwrap(); + + // Check numerical stability + assert_eq!( + predictions.len(), + n_test_samples, + "Number of predictions should match number of test samples" + ); + + // Check that all predictions are valid class labels + for &pred in predictions.iter() { + assert!(pred < n_classes as u32, "Predicted class should be valid"); + } - // Check consistency of predictions - let repeated_predictions = nb.predict(&test_x).map_err(|e| format!("Repeated prediction failed: {}", e)).unwrap(); - assert_eq!(predictions, repeated_predictions, "Predictions should be consistent when repeated"); - - // Check extreme values - let extreme_x = DenseMatrix::new(2, n_features, vec![f64::MAX; n_features * 2], true).unwrap(); - let extreme_predictions = nb.predict(&extreme_x); - assert!(extreme_predictions.is_err(), "Extreme value input should result in an error"); - assert_eq!( - extreme_predictions.unwrap_err().to_string(), - "Predict failed: Failed to predict, all probabilities were NaN", - "Incorrect error message for extreme values" - ); - - // Check for NaN handling - let nan_x = DenseMatrix::new(2, n_features, vec![f64::NAN; n_features * 2], true).unwrap(); - let nan_predictions = nb.predict(&nan_x); - assert!(nan_predictions.is_err(), "NaN input should result in an error"); - - // Check for very small values - let small_x = DenseMatrix::new(2, n_features, vec![f64::MIN_POSITIVE; n_features * 2], true).unwrap(); - let small_predictions = nb.predict(&small_x).map_err(|e| format!("Small value prediction failed: {}", e)).unwrap(); - for &pred in small_predictions.iter() { - assert!(pred < n_classes as u32, "Predictions for very small values should be valid"); - } + // Check consistency of predictions + let repeated_predictions = nb + .predict(&test_x) + .map_err(|e| format!("Repeated prediction failed: {}", e)) + .unwrap(); + assert_eq!( + predictions, repeated_predictions, + "Predictions should be consistent when repeated" + ); + + // Check extreme values + let extreme_x = + DenseMatrix::new(2, n_features, vec![f64::MAX; n_features * 2], true).unwrap(); + let extreme_predictions = nb.predict(&extreme_x); + assert!( + extreme_predictions.is_err(), + "Extreme value input should result in an error" + ); + assert_eq!( + extreme_predictions.unwrap_err().to_string(), + "Predict failed: Failed to predict, all probabilities were NaN", + "Incorrect error message for extreme values" + ); + + // Check for NaN handling + let nan_x = DenseMatrix::new(2, n_features, vec![f64::NAN; n_features * 2], true).unwrap(); + let nan_predictions = nb.predict(&nan_x); + assert!( + nan_predictions.is_err(), + "NaN input should result in an error" + ); + + // Check for very small values + let small_x = + DenseMatrix::new(2, n_features, vec![f64::MIN_POSITIVE; n_features * 2], true).unwrap(); + let small_predictions = nb + .predict(&small_x) + .map_err(|e| format!("Small value prediction failed: {}", e)) + .unwrap(); + for &pred in small_predictions.iter() { + assert!( + pred < n_classes as u32, + "Predictions for very small values should be valid" + ); + } - // Check for values close to zero - let near_zero_x = DenseMatrix::new(2, n_features, vec![1e-300; n_features * 2], true).unwrap(); - let near_zero_predictions = nb.predict(&near_zero_x).map_err(|e| format!("Near-zero value prediction failed: {}", e)).unwrap(); - for &pred in near_zero_predictions.iter() { - assert!(pred < n_classes as u32, "Predictions for near-zero values should be valid"); - } + // Check for values close to zero + let near_zero_x = + DenseMatrix::new(2, n_features, vec![1e-300; n_features * 2], true).unwrap(); + let near_zero_predictions = nb + .predict(&near_zero_x) + .map_err(|e| format!("Near-zero value prediction failed: {}", e)) + .unwrap(); + for &pred in near_zero_predictions.iter() { + assert!( + pred < n_classes as u32, + "Predictions for near-zero values should be valid" + ); + } - println!("All numerical stability checks passed!"); + println!("All numerical stability checks passed!"); } - - -} \ No newline at end of file +} From b8104806e2661f11b17051b7ccb7daac2c9e9e2c Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Fri, 24 Jan 2025 12:21:48 +0000 Subject: [PATCH 09/10] Add test for numerical stability with numpy --- src/naive_bayes/mod.rs | 100 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 142b944a..9f05a69e 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -530,4 +530,104 @@ mod tests { println!("All numerical stability checks passed!"); } + + #[test] + fn test_gaussian_naive_bayes_numerical_stability_random_data() { + #[derive(Debug)] + struct MySimpleRng { + state: u64, + } + + impl MySimpleRng { + fn new(seed: u64) -> Self { + MySimpleRng { state: seed } + } + + /// Get the next u64 in the sequence. + fn next_u64(&mut self) -> u64 { + // LCG parameters; these are somewhat arbitrary but commonly used. + // Feel free to tweak the multiplier/adder etc. + self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1); + self.state + } + + /// Get an f64 in the range [min, max). + fn next_f64(&mut self, min: f64, max: f64) -> f64 { + let fraction = (self.next_u64() as f64) / (u64::MAX as f64); + min + fraction * (max - min) + } + + /// Get a usize in the range [min, max). This floors the floating result. + fn gen_range_usize(&mut self, min: usize, max: usize) -> usize { + let v = self.next_f64(min as f64, max as f64); + // Truncate into the integer range. Because of floating inexactness, + // ensure we also clamp. + let int_v = v.floor() as isize; + // simple clamp to avoid any float rounding out of range + let clamped = int_v.max(min as isize).min((max - 1) as isize); + clamped as usize + } + } + use crate::naive_bayes::gaussian::GaussianNB; + // We will generate random data in a reproducible way (using a fixed seed). + // We will generate random data in a reproducible way: + let mut rng = MySimpleRng::new(42); + + let n_samples = 1000; + let n_features = 5; + let n_classes = 4; + + // Our feature matrix and label vector + let mut x_data = Vec::with_capacity(n_samples * n_features); + let mut y_data = Vec::with_capacity(n_samples); + + // Fill x_data with random values and y_data with random class labels. + for _i in 0..n_samples { + for _j in 0..n_features { + // We’ll pick random values in [-10, 10). + x_data.push(rng.next_f64(-10.0, 10.0)); + } + let class = rng.gen_range_usize(0, n_classes) as u32; + y_data.push(class); + } + + // Create DenseMatrix from x_data + let x = DenseMatrix::new(n_samples, n_features, x_data, true).unwrap(); + + // Train GaussianNB + let gnb = GaussianNB::fit(&x, &y_data, Default::default()) + .expect("Fitting GaussianNB with random data failed."); + + // Predict on the same training data to verify no numerical instability + let predictions = gnb.predict(&x).expect("Prediction on random data failed."); + + // Basic sanity checks + assert_eq!( + predictions.len(), + n_samples, + "Prediction size must match n_samples" + ); + for &pred_class in &predictions { + assert!( + (pred_class as usize) < n_classes, + "Predicted class {} is out of range [0..n_classes).", + pred_class + ); + } + + // If you want to compare with scikit-learn, you can do something like: + // println!("X = {:?}", &x); + // println!("Y = {:?}", &y_data); + // println!("predictions = {:?}", &predictions); + // and then in Python: + // import numpy as np + // from sklearn.naive_bayes import GaussianNB + // X = np.reshape(np.array(x), (1000, 5), order='F') + // Y = np.array(y) + // gnb = GaussianNB().fit(X, Y) + // preds = gnb.predict(X[[5:50], :]) + // expected = np.array(predictions[5:50]) + // assert expected == preds + // They should match closely (or exactly) depending on floating rounding. + } } From 6f4850ae89dda4a62b236c6a18ee580506867d38 Mon Sep 17 00:00:00 2001 From: Lorenzo Mec-iS Date: Fri, 24 Jan 2025 12:24:30 +0000 Subject: [PATCH 10/10] cleanup --- src/naive_bayes/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 9f05a69e..4a949d7f 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -625,8 +625,8 @@ mod tests { // X = np.reshape(np.array(x), (1000, 5), order='F') // Y = np.array(y) // gnb = GaussianNB().fit(X, Y) - // preds = gnb.predict(X[[5:50], :]) - // expected = np.array(predictions[5:50]) + // preds = gnb.predict(X) + // expected = np.array(predictions) // assert expected == preds // They should match closely (or exactly) depending on floating rounding. }