diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3a53064e..f69b5fcc 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -1,38 +1,38 @@ -name: Coverage - -on: - pull_request: - push: - branches: - - master - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - test: - name: coverage - runs-on: ubuntu-latest - services: - qdrant: - image: qdrant/qdrant:v1.9.7 - ports: - - 6334:6334 - env: - RUST_LOG: DEBUG - RUST_BACKTRACE: 1 - QDRANT_URL: http://qdrant:6334 - container: - image: xd009642/tarpaulin:develop-nightly - options: --security-opt seccomp=unconfined - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Generate code coverage - run: | - cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out xml - - - name: Coveralls - uses: coverallsapp/github-action@v2 +# name: Coverage +# +# on: +# pull_request: +# push: +# branches: +# - master +# +# concurrency: +# group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} +# cancel-in-progress: true +# +# jobs: +# test: +# name: coverage +# runs-on: ubuntu-latest +# services: +# qdrant: +# image: qdrant/qdrant:v1.9.7 +# ports: +# - 6334:6334 +# env: +# RUST_LOG: swiftide=debug +# RUST_BACKTRACE: 1 +# QDRANT_URL: http://qdrant:6334 +# container: +# image: xd009642/tarpaulin:develop-nightly +# options: --security-opt seccomp=unconfined +# steps: +# - name: Checkout repository +# uses: actions/checkout@v4 +# +# - name: Generate code coverage +# run: | +# cargo tarpaulin --verbose --all-features -p swiftide --timeout 120 --out xml +# +# - name: Coveralls +# uses: coverallsapp/github-action@v2 diff --git a/Cargo.lock b/Cargo.lock index 80cd7b45..53d3951e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3155,7 +3155,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.67", @@ -3199,17 +3199,19 @@ dependencies = [ [[package]] name = "qdrant-client" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f99a77fdc9b1ef9ce9ab9edcdd3e56e6977475faf633598f1a37066c87be4b7" +checksum = "eb7527de5e2ea7a239d3bc1da8b2d6db5652322daa1cbf4b3a55ad670f3891ed" dependencies = [ "anyhow", + "derive_builder", "futures-util", "prost", "prost-types", "reqwest", "serde", "serde_json", + "thiserror", "tonic", ] diff --git a/README.md b/README.md index ac405fb9..4b7c629a 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,11 @@ *** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use. *** https://www.markdownguide.org/basic-syntax/#reference-style-links --> + ![CI](https://img.shields.io/github/actions/workflow/status/bosun-ai/swiftide/test.yml?style=flat-square) -![Coverage Status](https://img.shields.io/coverallsCoverage/github/bosun-ai/swiftide?style=flat-square) + + + [![Crate Badge]][Crate] [![Docs Badge]][API Docs] [![Contributors][contributors-shield]][contributors-url] diff --git a/examples/ingest_codebase.rs b/examples/ingest_codebase.rs index 0cf476c2..ef476921 100644 --- a/examples/ingest_codebase.rs +++ b/examples/ingest_codebase.rs @@ -40,11 +40,6 @@ async fn main() -> Result<(), Box> { .unwrap_or("redis://localhost:6379") .to_owned(); - let qdrant_url = std::env::var("QDRANT_URL") - .as_deref() - .unwrap_or("http://localhost:6334") - .to_owned(); - ingestion::IngestionPipeline::from_loader(FileLoader::new(".").with_extensions(&["rs"])) .filter_cached(Redis::try_from_url(redis_url, "swiftide-examples")?) .then(MetadataQACode::new(openai_client.clone())) @@ -54,10 +49,10 @@ async fn main() -> Result<(), Box> { )?) .then_in_batch(10, Embed::new(openai_client.clone())) .then_store_with( - Qdrant::try_from_url(qdrant_url)? + Qdrant::builder() .batch_size(50) .vector_size(1536) - .collection_name("swiftide-examples".to_string()) + .collection_name("swiftide-examples") .build()?, ) .run() diff --git a/examples/ingest_markdown_lots_of_metadata.rs b/examples/ingest_markdown_lots_of_metadata.rs index 18b9f77b..a7c41c0e 100644 --- a/examples/ingest_markdown_lots_of_metadata.rs +++ b/examples/ingest_markdown_lots_of_metadata.rs @@ -33,11 +33,6 @@ async fn main() -> Result<(), Box> { .default_prompt_model("gpt-4o") .build()?; - let qdrant_url = std::env::var("QDRANT_URL") - .as_deref() - .unwrap_or("http://localhost:6334") - .to_owned(); - ingestion::IngestionPipeline::from_loader( FileLoader::new("README.md").with_extensions(&["md"]), ) @@ -51,10 +46,10 @@ async fn main() -> Result<(), Box> { .log_all() .filter_errors() .then_store_with( - Qdrant::try_from_url(qdrant_url)? + Qdrant::builder() .batch_size(50) .vector_size(1536) - .collection_name("swiftide-examples".to_string()) + .collection_name("swiftide-examples") .build()?, ) .run() diff --git a/swiftide/Cargo.toml b/swiftide/Cargo.toml index 9d222fbc..831da36a 100644 --- a/swiftide/Cargo.toml +++ b/swiftide/Cargo.toml @@ -32,7 +32,7 @@ pin-project-lite = "0.2" # Integrations async-openai = { version = "0.23.2", optional = true } -qdrant-client = { version = "1.9.0", optional = true } +qdrant-client = { version = "1.10.1", optional = true } redis = { version = "0.25.4", features = [ "aio", "tokio-comp", diff --git a/swiftide/src/integrations/qdrant/mod.rs b/swiftide/src/integrations/qdrant/mod.rs index eecaee6e..dcfa139c 100644 --- a/swiftide/src/integrations/qdrant/mod.rs +++ b/swiftide/src/integrations/qdrant/mod.rs @@ -5,25 +5,34 @@ mod ingestion_node; mod persist; -use anyhow::Result; +use std::sync::Arc; + +use anyhow::{Context as _, Result}; use derive_builder::Builder; -use qdrant_client::client::QdrantClient; -use qdrant_client::prelude::*; -use qdrant_client::qdrant::vectors_config::Config; -use qdrant_client::qdrant::{VectorParams, VectorsConfig}; +use qdrant_client::qdrant::{CreateCollectionBuilder, Distance, VectorParamsBuilder}; const DEFAULT_COLLECTION_NAME: &str = "swiftide"; +const DEFAULT_QDRANT_URL: &str = "http://localhost:6334"; /// A struct representing a Qdrant client with configuration options. /// /// This struct is used to interact with the Qdrant vector database, providing methods to create and manage /// vector collections, store data, and ensure proper indexing for efficient searches. -#[derive(Builder)] -#[builder(pattern = "owned", setter(strip_option))] +/// +/// Can be cloned with relative low cost as the client is shared. +#[derive(Builder, Clone)] +#[builder( + pattern = "owned", + setter(strip_option), + build_fn(error = "anyhow::Error") +)] pub struct Qdrant { /// The Qdrant client used to interact with the Qdrant vector database. - #[builder(setter(into))] - client: QdrantClient, + /// + /// By default the client will be build from QDRANT_URL and option QDRANT_API_KEY. + /// It will fall back to `http://localhost:6334` if QDRANT_URL is not set. + #[builder(setter(into), default = "self.default_client()?")] + client: Arc, /// The name of the collection to be used in Qdrant. Defaults to "swiftide". #[builder(default = "DEFAULT_COLLECTION_NAME.to_string()")] #[builder(setter(into))] @@ -41,7 +50,9 @@ impl Qdrant { QdrantBuilder::default() } - /// Tries to create a `QdrantBuilder` from a given URL. + /// Tries to create a `QdrantBuilder` from a given URL. Will use the api key in QDRANT_API_KEY if present. + /// + /// Returns /// /// # Arguments /// @@ -51,7 +62,11 @@ impl Qdrant { /// /// A `Result` containing the `QdrantBuilder` if successful, or an error otherwise. pub fn try_from_url(url: impl AsRef) -> Result { - Ok(QdrantBuilder::default().client(QdrantClient::from_url(url.as_ref()).build()?)) + Ok(QdrantBuilder::default().client( + qdrant_client::Qdrant::from_url(url.as_ref()) + .api_key(std::env::var("QDRANT_API_KEY")) + .build()?, + )) } /// Creates an index in the Qdrant collection if it does not already exist. @@ -71,22 +86,28 @@ impl Qdrant { tracing::warn!("Creating collection {}", self.collection_name); self.client - .create_collection(&CreateCollection { - collection_name: self.collection_name.to_string(), - vectors_config: Some(VectorsConfig { - config: Some(Config::Params(VectorParams { - size: self.vector_size, - distance: Distance::Cosine.into(), - ..Default::default() - })), - }), - ..Default::default() - }) + .create_collection( + CreateCollectionBuilder::new(self.collection_name.clone()) + .vectors_config(VectorParamsBuilder::new(self.vector_size, Distance::Cosine)), + ) .await?; Ok(()) } } +impl QdrantBuilder { + fn default_client(&self) -> Result> { + let client = qdrant_client::Qdrant::from_url( + &std::env::var("QDRANT_URL").unwrap_or(DEFAULT_QDRANT_URL.to_string()), + ) + .api_key(std::env::var("QDRANT_API_KEY")) + .build() + .context("Could not build default qdrant client")?; + + Ok(Arc::new(client)) + } +} + impl std::fmt::Debug for Qdrant { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Qdrant") diff --git a/swiftide/src/integrations/qdrant/persist.rs b/swiftide/src/integrations/qdrant/persist.rs index db9b8e8d..a04a085d 100644 --- a/swiftide/src/integrations/qdrant/persist.rs +++ b/swiftide/src/integrations/qdrant/persist.rs @@ -4,6 +4,7 @@ use anyhow::Result; use async_trait::async_trait; +use qdrant_client::qdrant::UpsertPointsBuilder; use crate::{ ingestion::{IngestionNode, IngestionStream}, @@ -54,8 +55,14 @@ impl Persist for Qdrant { #[tracing::instrument(skip_all, err, name = "storage.qdrant.store")] async fn store(&self, node: crate::ingestion::IngestionNode) -> Result { let point = node.clone().try_into()?; + + tracing::debug!(?node, ?point, "Storing node"); + self.client - .upsert_points_blocking(self.collection_name.to_string(), None, vec![point], None) + .upsert_points(UpsertPointsBuilder::new( + self.collection_name.to_string(), + vec![point], + )) .await?; Ok(node) } @@ -86,15 +93,20 @@ impl Persist for Qdrant { let points = points.unwrap(); + tracing::debug!("Storing batch of {} nodes", points.len()); + let result = self .client - .upsert_points_blocking(self.collection_name.to_string(), None, points, None) + .upsert_points(UpsertPointsBuilder::new( + self.collection_name.to_string(), + points, + )) .await; if result.is_ok() { IngestionStream::iter(nodes.into_iter().map(Ok)) } else { - vec![Err(result.unwrap_err())].into() + vec![Err(result.unwrap_err().into())].into() } } } diff --git a/swiftide/tests/ingestion_pipeline.rs b/swiftide/tests/ingestion_pipeline.rs index 754daa68..632fc400 100644 --- a/swiftide/tests/ingestion_pipeline.rs +++ b/swiftide/tests/ingestion_pipeline.rs @@ -2,6 +2,7 @@ //! The tests validate the functionality of the pipeline, ensuring it processes data correctly //! from a temporary file, simulates API responses, and stores data accurately in the Qdrant vector database. +use qdrant_client::qdrant::{SearchPointsBuilder, Value}; use serde_json::json; use swiftide::{ingestion::IngestionPipeline, loaders::FileLoader, *}; use temp_dir::TempDir; @@ -66,7 +67,7 @@ async fn test_ingestion_pipeline() { "data": [ { "object": "embedding", - "embedding": vec![0; 1536], + "embedding": vec![0; 1536], "index": 0 } ], @@ -174,20 +175,18 @@ async fn test_ingestion_pipeline() { result.expect("Ingestion pipeline failed"); - use qdrant_client::prelude::*; - let qdrant_client = QdrantClient::from_url(&qdrant_url).build().unwrap(); - let search_result = qdrant_client - .search_points(&SearchPoints { - collection_name: "swiftide-test".to_string(), - vector: vec![0_f32; 1536], - limit: 10, - with_payload: Some(true.into()), - ..Default::default() - }) - .await + let qdrant_client = qdrant_client::Qdrant::from_url(&qdrant_url) + .build() .unwrap(); - let first = search_result.result.first().unwrap(); + let search_request = + SearchPointsBuilder::new("swiftide-test", vec![0_f32; 1536], 10).with_payload(true); + + let search_response = qdrant_client.search_points(search_request).await.unwrap(); + + dbg!(&search_response); + + let first = search_response.result.first().unwrap(); assert!(first .payload