Skip to content

Commit

Permalink
fix(qdrant): upgrade and better defaults (#118)
Browse files Browse the repository at this point in the history
- **fix(deps): update rust crate qdrant-client to v1.10.1**
- **fix(qdrant): upgrade to new qdrant with sensible defaults**
- **feat(qdrant): safe to clone with internal arc**

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
  • Loading branch information
timonv and renovate[bot] authored Jul 2, 2024
1 parent a8b02a3 commit 353cd9e
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 95 deletions.
76 changes: 38 additions & 38 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
name: Coverage

on:
pull_request:
push:
branches:
- master

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
test:
name: coverage
runs-on: ubuntu-latest
services:
qdrant:
image: qdrant/qdrant:v1.9.7
ports:
- 6334:6334
env:
RUST_LOG: DEBUG
RUST_BACKTRACE: 1
QDRANT_URL: http://qdrant:6334
container:
image: xd009642/tarpaulin:develop-nightly
options: --security-opt seccomp=unconfined
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Generate code coverage
run: |
cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out xml
- name: Coveralls
uses: coverallsapp/github-action@v2
# name: Coverage
#
# on:
# pull_request:
# push:
# branches:
# - master
#
# concurrency:
# group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
# cancel-in-progress: true
#
# jobs:
# test:
# name: coverage
# runs-on: ubuntu-latest
# services:
# qdrant:
# image: qdrant/qdrant:v1.9.7
# ports:
# - 6334:6334
# env:
# RUST_LOG: swiftide=debug
# RUST_BACKTRACE: 1
# QDRANT_URL: http://qdrant:6334
# container:
# image: xd009642/tarpaulin:develop-nightly
# options: --security-opt seccomp=unconfined
# steps:
# - name: Checkout repository
# uses: actions/checkout@v4
#
# - name: Generate code coverage
# run: |
# cargo tarpaulin --verbose --all-features -p swiftide --timeout 120 --out xml
#
# - name: Coveralls
# uses: coverallsapp/github-action@v2
8 changes: 5 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@
*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
-->

![CI](https://img.shields.io/github/actions/workflow/status/bosun-ai/swiftide/test.yml?style=flat-square)
![Coverage Status](https://img.shields.io/coverallsCoverage/github/bosun-ai/swiftide?style=flat-square)

<!-- ![Coverage Status](https://img.shields.io/coverallsCoverage/github/bosun-ai/swiftide?style=flat-square) -->

[![Crate Badge]][Crate]
[![Docs Badge]][API Docs]
[![Contributors][contributors-shield]][contributors-url]
Expand Down
9 changes: 2 additions & 7 deletions examples/ingest_codebase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.unwrap_or("redis://localhost:6379")
.to_owned();

let qdrant_url = std::env::var("QDRANT_URL")
.as_deref()
.unwrap_or("http://localhost:6334")
.to_owned();

ingestion::IngestionPipeline::from_loader(FileLoader::new(".").with_extensions(&["rs"]))
.filter_cached(Redis::try_from_url(redis_url, "swiftide-examples")?)
.then(MetadataQACode::new(openai_client.clone()))
Expand All @@ -54,10 +49,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
)?)
.then_in_batch(10, Embed::new(openai_client.clone()))
.then_store_with(
Qdrant::try_from_url(qdrant_url)?
Qdrant::builder()
.batch_size(50)
.vector_size(1536)
.collection_name("swiftide-examples".to_string())
.collection_name("swiftide-examples")
.build()?,
)
.run()
Expand Down
9 changes: 2 additions & 7 deletions examples/ingest_markdown_lots_of_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.default_prompt_model("gpt-4o")
.build()?;

let qdrant_url = std::env::var("QDRANT_URL")
.as_deref()
.unwrap_or("http://localhost:6334")
.to_owned();

ingestion::IngestionPipeline::from_loader(
FileLoader::new("README.md").with_extensions(&["md"]),
)
Expand All @@ -51,10 +46,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.log_all()
.filter_errors()
.then_store_with(
Qdrant::try_from_url(qdrant_url)?
Qdrant::builder()
.batch_size(50)
.vector_size(1536)
.collection_name("swiftide-examples".to_string())
.collection_name("swiftide-examples")
.build()?,
)
.run()
Expand Down
2 changes: 1 addition & 1 deletion swiftide/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pin-project-lite = "0.2"

# Integrations
async-openai = { version = "0.23.2", optional = true }
qdrant-client = { version = "1.9.0", optional = true }
qdrant-client = { version = "1.10.1", optional = true }
redis = { version = "0.25.4", features = [
"aio",
"tokio-comp",
Expand Down
65 changes: 43 additions & 22 deletions swiftide/src/integrations/qdrant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,34 @@
mod ingestion_node;
mod persist;

use anyhow::Result;
use std::sync::Arc;

use anyhow::{Context as _, Result};
use derive_builder::Builder;
use qdrant_client::client::QdrantClient;
use qdrant_client::prelude::*;
use qdrant_client::qdrant::vectors_config::Config;
use qdrant_client::qdrant::{VectorParams, VectorsConfig};
use qdrant_client::qdrant::{CreateCollectionBuilder, Distance, VectorParamsBuilder};

const DEFAULT_COLLECTION_NAME: &str = "swiftide";
const DEFAULT_QDRANT_URL: &str = "http://localhost:6334";

/// A struct representing a Qdrant client with configuration options.
///
/// This struct is used to interact with the Qdrant vector database, providing methods to create and manage
/// vector collections, store data, and ensure proper indexing for efficient searches.
#[derive(Builder)]
#[builder(pattern = "owned", setter(strip_option))]
///
/// Can be cloned with relative low cost as the client is shared.
#[derive(Builder, Clone)]
#[builder(
pattern = "owned",
setter(strip_option),
build_fn(error = "anyhow::Error")
)]
pub struct Qdrant {
/// The Qdrant client used to interact with the Qdrant vector database.
#[builder(setter(into))]
client: QdrantClient,
///
/// By default the client will be build from QDRANT_URL and option QDRANT_API_KEY.
/// It will fall back to `http://localhost:6334` if QDRANT_URL is not set.
#[builder(setter(into), default = "self.default_client()?")]
client: Arc<qdrant_client::Qdrant>,
/// The name of the collection to be used in Qdrant. Defaults to "swiftide".
#[builder(default = "DEFAULT_COLLECTION_NAME.to_string()")]
#[builder(setter(into))]
Expand All @@ -41,7 +50,9 @@ impl Qdrant {
QdrantBuilder::default()
}

/// Tries to create a `QdrantBuilder` from a given URL.
/// Tries to create a `QdrantBuilder` from a given URL. Will use the api key in QDRANT_API_KEY if present.
///
/// Returns
///
/// # Arguments
///
Expand All @@ -51,7 +62,11 @@ impl Qdrant {
///
/// A `Result` containing the `QdrantBuilder` if successful, or an error otherwise.
pub fn try_from_url(url: impl AsRef<str>) -> Result<QdrantBuilder> {
Ok(QdrantBuilder::default().client(QdrantClient::from_url(url.as_ref()).build()?))
Ok(QdrantBuilder::default().client(
qdrant_client::Qdrant::from_url(url.as_ref())
.api_key(std::env::var("QDRANT_API_KEY"))
.build()?,
))
}

/// Creates an index in the Qdrant collection if it does not already exist.
Expand All @@ -71,22 +86,28 @@ impl Qdrant {

tracing::warn!("Creating collection {}", self.collection_name);
self.client
.create_collection(&CreateCollection {
collection_name: self.collection_name.to_string(),
vectors_config: Some(VectorsConfig {
config: Some(Config::Params(VectorParams {
size: self.vector_size,
distance: Distance::Cosine.into(),
..Default::default()
})),
}),
..Default::default()
})
.create_collection(
CreateCollectionBuilder::new(self.collection_name.clone())
.vectors_config(VectorParamsBuilder::new(self.vector_size, Distance::Cosine)),
)
.await?;
Ok(())
}
}

impl QdrantBuilder {
fn default_client(&self) -> Result<Arc<qdrant_client::Qdrant>> {
let client = qdrant_client::Qdrant::from_url(
&std::env::var("QDRANT_URL").unwrap_or(DEFAULT_QDRANT_URL.to_string()),
)
.api_key(std::env::var("QDRANT_API_KEY"))
.build()
.context("Could not build default qdrant client")?;

Ok(Arc::new(client))
}
}

impl std::fmt::Debug for Qdrant {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Qdrant")
Expand Down
18 changes: 15 additions & 3 deletions swiftide/src/integrations/qdrant/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
use anyhow::Result;
use async_trait::async_trait;
use qdrant_client::qdrant::UpsertPointsBuilder;

use crate::{
ingestion::{IngestionNode, IngestionStream},
Expand Down Expand Up @@ -54,8 +55,14 @@ impl Persist for Qdrant {
#[tracing::instrument(skip_all, err, name = "storage.qdrant.store")]
async fn store(&self, node: crate::ingestion::IngestionNode) -> Result<IngestionNode> {
let point = node.clone().try_into()?;

tracing::debug!(?node, ?point, "Storing node");

self.client
.upsert_points_blocking(self.collection_name.to_string(), None, vec![point], None)
.upsert_points(UpsertPointsBuilder::new(
self.collection_name.to_string(),
vec![point],
))
.await?;
Ok(node)
}
Expand Down Expand Up @@ -86,15 +93,20 @@ impl Persist for Qdrant {

let points = points.unwrap();

tracing::debug!("Storing batch of {} nodes", points.len());

let result = self
.client
.upsert_points_blocking(self.collection_name.to_string(), None, points, None)
.upsert_points(UpsertPointsBuilder::new(
self.collection_name.to_string(),
points,
))
.await;

if result.is_ok() {
IngestionStream::iter(nodes.into_iter().map(Ok))
} else {
vec![Err(result.unwrap_err())].into()
vec![Err(result.unwrap_err().into())].into()
}
}
}
25 changes: 12 additions & 13 deletions swiftide/tests/ingestion_pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! The tests validate the functionality of the pipeline, ensuring it processes data correctly
//! from a temporary file, simulates API responses, and stores data accurately in the Qdrant vector database.
use qdrant_client::qdrant::{SearchPointsBuilder, Value};
use serde_json::json;
use swiftide::{ingestion::IngestionPipeline, loaders::FileLoader, *};
use temp_dir::TempDir;
Expand Down Expand Up @@ -66,7 +67,7 @@ async fn test_ingestion_pipeline() {
"data": [
{
"object": "embedding",
"embedding": vec![0; 1536],
"embedding": vec![0; 1536],
"index": 0
}
],
Expand Down Expand Up @@ -174,20 +175,18 @@ async fn test_ingestion_pipeline() {

result.expect("Ingestion pipeline failed");

use qdrant_client::prelude::*;
let qdrant_client = QdrantClient::from_url(&qdrant_url).build().unwrap();
let search_result = qdrant_client
.search_points(&SearchPoints {
collection_name: "swiftide-test".to_string(),
vector: vec![0_f32; 1536],
limit: 10,
with_payload: Some(true.into()),
..Default::default()
})
.await
let qdrant_client = qdrant_client::Qdrant::from_url(&qdrant_url)
.build()
.unwrap();

let first = search_result.result.first().unwrap();
let search_request =
SearchPointsBuilder::new("swiftide-test", vec![0_f32; 1536], 10).with_payload(true);

let search_response = qdrant_client.search_points(search_request).await.unwrap();

dbg!(&search_response);

let first = search_response.result.first().unwrap();

assert!(first
.payload
Expand Down

0 comments on commit 353cd9e

Please sign in to comment.