Skip to content

Commit

Permalink
chore!: remove opq related code (#2322)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: remove OPQ implementation
  • Loading branch information
eddyxu authored May 10, 2024
1 parent 79de380 commit 33d576f
Show file tree
Hide file tree
Showing 18 changed files with 19 additions and 428 deletions.
6 changes: 0 additions & 6 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,12 +1303,6 @@ def create_index(
num_sub_vectors
Optional parameters for "IVF_PQ":
use_opq : bool
whether to use OPQ (Optimized Product Quantization).
Must have feature 'opq' enabled in Rust.
max_opq_iterations : int
the maximum number of iterations for training OPQ.
ivf_centroids :
K-mean centroids for IVF clustering.
Expand Down
14 changes: 0 additions & 14 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,16 +1301,6 @@ fn prepare_vector_index_params(
pq_params.num_sub_vectors = PyAny::downcast::<PyInt>(n)?.extract()?
};

if let Some(o) = kwargs.get_item("use_opq")? {
#[cfg(not(feature = "opq"))]
if PyAny::downcast::<PyBool>(o)?.extract()? {
return Err(PyValueError::new_err(
"Feature 'opq' is not installed.".to_string(),
));
}
pq_params.use_opq = PyAny::downcast::<PyBool>(o)?.extract()?
};

if let Some(c) = kwargs.get_item("pq_codebook")? {
let batch = RecordBatch::from_pyarrow(c)?;
if "_pq_codebook" != batch.schema().field(0).name() {
Expand All @@ -1321,10 +1311,6 @@ fn prepare_vector_index_params(
let codebook = as_fixed_size_list_array(batch.column(0));
pq_params.codebook = Some(codebook.values().clone())
};

if let Some(o) = kwargs.get_item("max_opq_iterations")? {
pq_params.max_opq_iters = PyAny::downcast::<PyInt>(o)?.extract()?
};
}

match index_type {
Expand Down
9 changes: 0 additions & 9 deletions rust/lance-index/src/vector/pq/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,9 @@ pub struct PQBuildParams {
/// The number of bits to present one PQ centroid.
pub num_bits: usize,

/// Train as optimized product quantization.
pub use_opq: bool,

/// The max number of iterations for kmeans training.
pub max_iters: usize,

/// Max number of iterations to train Optimized Product Quantization,
/// if `use_opq` is true.
pub max_opq_iters: usize,

/// User provided codebook.
pub codebook: Option<ArrayRef>,

Expand All @@ -57,9 +50,7 @@ impl Default for PQBuildParams {
Self {
num_sub_vectors: 16,
num_bits: 8,
use_opq: false,
max_iters: 50,
max_opq_iters: 50,
codebook: None,
sample_rate: 256,
}
Expand Down
102 changes: 1 addition & 101 deletions rust/lance-linalg/src/matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,63 +217,6 @@ impl<T: ArrowFloatType> MatrixView<T> {
// todo!("normalize")
}

/// Dot multiply
#[cfg(feature = "opq")]
pub fn dot(&self, rhs: &Self) -> Result<Self> {
use cblas::{sgemm, Layout, Transpose};

let m = self.num_rows() as i32;
let k = self.num_columns() as i32;
let n = rhs.num_columns() as i32;
if self.num_columns() != rhs.num_rows() {
return Err(Error::Arrow {
message: format!(
"MatMul dimension mismatch: A({m}x{k}) * B({}x{n}",
rhs.num_rows()
),
});
}

let mut c_builder = Float32Builder::with_capacity((m * n) as usize);
unsafe { c_builder.append_trusted_len_iter((0..n * m).map(|_| 0.0)) }

let (trans_a, lda) = if self.transpose {
(Transpose::Ordinary, m)
} else {
(Transpose::None, k)
};
let (trans_b, ldb) = if rhs.transpose {
(Transpose::Ordinary, k)
} else {
(Transpose::None, n)
};
unsafe {
sgemm(
Layout::RowMajor,
trans_a,
trans_b,
m,
n,
k,
1.0,
self.data.values(),
lda,
rhs.data.values(),
ldb,
0.0,
c_builder.values_slice_mut(),
n,
)
}

let data = Arc::new(c_builder.finish());
Ok(Self {
data,
num_columns: n as usize,
transpose: false,
})
}

/// Sample `n` rows from the matrix.
pub fn sample(&self, n: usize) -> Self {
let rng = SmallRng::from_entropy();
Expand Down Expand Up @@ -380,54 +323,11 @@ impl<'a, T: ArrowFloatType> Iterator for MatrixRowIter<'a, T> {
mod tests {
use std::collections::HashSet;

use arrow_array::Float32Array;

#[cfg(feature = "opq")]
use approx::assert_relative_eq;
use arrow_array::types::{Float32Type, Float64Type};
use arrow_array::Float32Array;

use super::*;

#[test]
#[cfg(feature = "opq")]
fn test_matrix_dot() {
// A[2,3]
let a_data = Arc::new(Float32Array::from_iter((1..=6).map(|v| v as f32)));
let a = MatrixView::new(a_data, 3);

// B[3,2]
let b_data = Arc::new(Float32Array::from_iter_values([
2.0, 3.0, 6.0, 7.0, 10.0, 11.0,
]));
let b = MatrixView::new(b_data, 2);

let c = a.dot(&b).unwrap();
let expected = vec![44.0, 50.0, 98.0, 113.0];
c.data.values().iter().zip(expected).for_each(|(&a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});
}

#[test]
#[cfg(feature = "opq")]
fn test_dot_on_transposed_mat() {
// A[2,3]
let a_data = Arc::new(Float32Array::from_iter((1..=6).map(|v| v as f32)));
let a = MatrixView::<Float32Array>::new(a_data, 3);

// B[3,2]
let b_data = Arc::new(Float32Array::from_iter_values([
2.0, 3.0, 6.0, 7.0, 10.0, 11.0,
]));
let b = MatrixView::<Float32Array>::new(b_data, 2);

let c_t = b.transpose().dot(&a.transpose()).unwrap();
let expected = vec![44.0, 98.0, 50.0, 113.0];
c_t.data.values().iter().zip(expected).for_each(|(&a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});
}

#[test]
fn test_sample_matrix() {
let a_data = Arc::new(Float32Array::from_iter((1..=20).map(|v| v as f32)));
Expand Down
5 changes: 2 additions & 3 deletions rust/lance/benches/ivf_pq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ fn bench_ivf_pq_index(c: &mut Criterion) {
|b| {
b.to_async(&rt).iter(|| async {
let params =
VectorIndexParams::ivf_pq(ivf_partition, 8, pq, false, MetricType::Cosine, 50);
VectorIndexParams::ivf_pq(ivf_partition, 8, pq, MetricType::Cosine, 50);

dataset
.clone()
Expand All @@ -105,8 +105,7 @@ fn bench_ivf_pq_index(c: &mut Criterion) {
format!("CreateIVF{},PQ{}(d={},metric=l2)", ivf_partition, pq, DIM).as_str(),
|b| {
b.to_async(&rt).iter(|| async {
let params =
VectorIndexParams::ivf_pq(ivf_partition, 8, pq, false, MetricType::L2, 50);
let params = VectorIndexParams::ivf_pq(ivf_partition, 8, pq, MetricType::L2, 50);

dataset
.clone()
Expand Down
1 change: 0 additions & 1 deletion rust/lance/benches/vector_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ async fn create_file(path: &std::path::Path, mode: WriteMode) {
let pq_params = PQBuildParams {
num_bits: 8,
num_sub_vectors: 16,
use_opq: false,
..Default::default()
};
let m_type = MetricType::L2;
Expand Down
2 changes: 0 additions & 2 deletions rust/lance/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,3 @@ pub use lance_arrow::schema::*;
pub use lance_arrow::*;

pub mod json;
#[cfg(feature = "opq")]
pub(crate) mod svd;
Loading

0 comments on commit 33d576f

Please sign in to comment.