chore!: remove opq related code (#2322)

BREAKING CHANGE: remove OPQ implementation
lancedb · May 10, 2024 · 33d576f · 33d576f
1 parent 79de380
commit 33d576f
Show file tree

Hide file tree

Showing 18 changed files with 19 additions and 428 deletions.
diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py
@@ -1303,12 +1303,6 @@ def create_index(
             num_sub_vectors
 
         Optional parameters for "IVF_PQ":
-
-            use_opq : bool
-                whether to use OPQ (Optimized Product Quantization).
-                Must have feature 'opq' enabled in Rust.
-            max_opq_iterations : int
-                the maximum number of iterations for training OPQ.
             ivf_centroids :
                 K-mean centroids for IVF clustering.
 

diff --git a/python/src/dataset.rs b/python/src/dataset.rs
@@ -1301,16 +1301,6 @@ fn prepare_vector_index_params(
             pq_params.num_sub_vectors = PyAny::downcast::<PyInt>(n)?.extract()?
         };
 
-        if let Some(o) = kwargs.get_item("use_opq")? {
-            #[cfg(not(feature = "opq"))]
-            if PyAny::downcast::<PyBool>(o)?.extract()? {
-                return Err(PyValueError::new_err(
-                    "Feature 'opq' is not installed.".to_string(),
-                ));
-            }
-            pq_params.use_opq = PyAny::downcast::<PyBool>(o)?.extract()?
-        };
-
         if let Some(c) = kwargs.get_item("pq_codebook")? {
             let batch = RecordBatch::from_pyarrow(c)?;
             if "_pq_codebook" != batch.schema().field(0).name() {
@@ -1321,10 +1311,6 @@ fn prepare_vector_index_params(
             let codebook = as_fixed_size_list_array(batch.column(0));
             pq_params.codebook = Some(codebook.values().clone())
         };
-
-        if let Some(o) = kwargs.get_item("max_opq_iterations")? {
-            pq_params.max_opq_iters = PyAny::downcast::<PyInt>(o)?.extract()?
-        };
     }
 
     match index_type {

diff --git a/rust/lance-index/src/vector/pq/builder.rs b/rust/lance-index/src/vector/pq/builder.rs
@@ -35,16 +35,9 @@ pub struct PQBuildParams {
     /// The number of bits to present one PQ centroid.
     pub num_bits: usize,
 
-    /// Train as optimized product quantization.
-    pub use_opq: bool,
-
     /// The max number of iterations for kmeans training.
     pub max_iters: usize,
 
-    /// Max number of iterations to train Optimized Product Quantization,
-    /// if `use_opq` is true.
-    pub max_opq_iters: usize,
-
     /// User provided codebook.
     pub codebook: Option<ArrayRef>,
 
@@ -57,9 +50,7 @@ impl Default for PQBuildParams {
         Self {
             num_sub_vectors: 16,
             num_bits: 8,
-            use_opq: false,
             max_iters: 50,
-            max_opq_iters: 50,
             codebook: None,
             sample_rate: 256,
         }

diff --git a/rust/lance-linalg/src/matrix.rs b/rust/lance-linalg/src/matrix.rs
@@ -217,63 +217,6 @@ impl<T: ArrowFloatType> MatrixView<T> {
         // todo!("normalize")
     }
 
-    /// Dot multiply
-    #[cfg(feature = "opq")]
-    pub fn dot(&self, rhs: &Self) -> Result<Self> {
-        use cblas::{sgemm, Layout, Transpose};
-
-        let m = self.num_rows() as i32;
-        let k = self.num_columns() as i32;
-        let n = rhs.num_columns() as i32;
-        if self.num_columns() != rhs.num_rows() {
-            return Err(Error::Arrow {
-                message: format!(
-                    "MatMul dimension mismatch: A({m}x{k}) * B({}x{n}",
-                    rhs.num_rows()
-                ),
-            });
-        }
-
-        let mut c_builder = Float32Builder::with_capacity((m * n) as usize);
-        unsafe { c_builder.append_trusted_len_iter((0..n * m).map(|_| 0.0)) }
-
-        let (trans_a, lda) = if self.transpose {
-            (Transpose::Ordinary, m)
-        } else {
-            (Transpose::None, k)
-        };
-        let (trans_b, ldb) = if rhs.transpose {
-            (Transpose::Ordinary, k)
-        } else {
-            (Transpose::None, n)
-        };
-        unsafe {
-            sgemm(
-                Layout::RowMajor,
-                trans_a,
-                trans_b,
-                m,
-                n,
-                k,
-                1.0,
-                self.data.values(),
-                lda,
-                rhs.data.values(),
-                ldb,
-                0.0,
-                c_builder.values_slice_mut(),
-                n,
-            )
-        }
-
-        let data = Arc::new(c_builder.finish());
-        Ok(Self {
-            data,
-            num_columns: n as usize,
-            transpose: false,
-        })
-    }
-
     /// Sample `n` rows from the matrix.
     pub fn sample(&self, n: usize) -> Self {
         let rng = SmallRng::from_entropy();
@@ -380,54 +323,11 @@ impl<'a, T: ArrowFloatType> Iterator for MatrixRowIter<'a, T> {
 mod tests {
     use std::collections::HashSet;
 
-    use arrow_array::Float32Array;
-
-    #[cfg(feature = "opq")]
-    use approx::assert_relative_eq;
     use arrow_array::types::{Float32Type, Float64Type};
+    use arrow_array::Float32Array;
 
     use super::*;
 
-    #[test]
-    #[cfg(feature = "opq")]
-    fn test_matrix_dot() {
-        // A[2,3]
-        let a_data = Arc::new(Float32Array::from_iter((1..=6).map(|v| v as f32)));
-        let a = MatrixView::new(a_data, 3);
-
-        // B[3,2]
-        let b_data = Arc::new(Float32Array::from_iter_values([
-            2.0, 3.0, 6.0, 7.0, 10.0, 11.0,
-        ]));
-        let b = MatrixView::new(b_data, 2);
-
-        let c = a.dot(&b).unwrap();
-        let expected = vec![44.0, 50.0, 98.0, 113.0];
-        c.data.values().iter().zip(expected).for_each(|(&a, b)| {
-            assert_relative_eq!(a, b, epsilon = 0.0001);
-        });
-    }
-
-    #[test]
-    #[cfg(feature = "opq")]
-    fn test_dot_on_transposed_mat() {
-        // A[2,3]
-        let a_data = Arc::new(Float32Array::from_iter((1..=6).map(|v| v as f32)));
-        let a = MatrixView::<Float32Array>::new(a_data, 3);
-
-        // B[3,2]
-        let b_data = Arc::new(Float32Array::from_iter_values([
-            2.0, 3.0, 6.0, 7.0, 10.0, 11.0,
-        ]));
-        let b = MatrixView::<Float32Array>::new(b_data, 2);
-
-        let c_t = b.transpose().dot(&a.transpose()).unwrap();
-        let expected = vec![44.0, 98.0, 50.0, 113.0];
-        c_t.data.values().iter().zip(expected).for_each(|(&a, b)| {
-            assert_relative_eq!(a, b, epsilon = 0.0001);
-        });
-    }
-
     #[test]
     fn test_sample_matrix() {
         let a_data = Arc::new(Float32Array::from_iter((1..=20).map(|v| v as f32)));

diff --git a/rust/lance/benches/ivf_pq.rs b/rust/lance/benches/ivf_pq.rs
@@ -84,7 +84,7 @@ fn bench_ivf_pq_index(c: &mut Criterion) {
         |b| {
             b.to_async(&rt).iter(|| async {
                 let params =
-                    VectorIndexParams::ivf_pq(ivf_partition, 8, pq, false, MetricType::Cosine, 50);
+                    VectorIndexParams::ivf_pq(ivf_partition, 8, pq, MetricType::Cosine, 50);
 
                 dataset
                     .clone()
@@ -105,8 +105,7 @@ fn bench_ivf_pq_index(c: &mut Criterion) {
         format!("CreateIVF{},PQ{}(d={},metric=l2)", ivf_partition, pq, DIM).as_str(),
         |b| {
             b.to_async(&rt).iter(|| async {
-                let params =
-                    VectorIndexParams::ivf_pq(ivf_partition, 8, pq, false, MetricType::L2, 50);
+                let params = VectorIndexParams::ivf_pq(ivf_partition, 8, pq, MetricType::L2, 50);
 
                 dataset
                     .clone()

diff --git a/rust/lance/benches/vector_index.rs b/rust/lance/benches/vector_index.rs
@@ -169,7 +169,6 @@ async fn create_file(path: &std::path::Path, mode: WriteMode) {
     let pq_params = PQBuildParams {
         num_bits: 8,
         num_sub_vectors: 16,
-        use_opq: false,
         ..Default::default()
     };
     let m_type = MetricType::L2;

diff --git a/rust/lance/src/arrow.rs b/rust/lance/src/arrow.rs
@@ -10,5 +10,3 @@ pub use lance_arrow::schema::*;
 pub use lance_arrow::*;
 
 pub mod json;
-#[cfg(feature = "opq")]
-pub(crate) mod svd;