Skip to content

Commit

Permalink
Update code to use csr_matrix and np.float64 for sparse vectors (#28)
Browse files Browse the repository at this point in the history
Fixes the bug appeared with numpy 1.14.0
See issue  #25
  • Loading branch information
alesaccoia authored Aug 7, 2024
1 parent 774db0c commit 692dc6c
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions milvus_model/hybrid/bge_m3.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
from typing import Dict, List

from scipy.sparse import csr_array, vstack
from scipy.sparse import csr_matrix, vstack
import numpy as np

from milvus_model.base import BaseEmbeddingFunction
from milvus_model.utils import import_FlagEmbedding
Expand Down Expand Up @@ -78,9 +79,9 @@ def _encode(self, texts: List[str]) -> Dict:
results["sparse"] = []
for sparse_vec in output["lexical_weights"]:
indices = [int(k) for k in sparse_vec]
values = list(sparse_vec.values())
values = np.array(list(sparse_vec.values()), dtype=np.float64)
row_indices = [0] * len(indices)
csr = csr_array((values, (row_indices, indices)), shape=(1, sparse_dim))
csr = csr_matrix((values, (row_indices, indices)), shape=(1, sparse_dim))
results["sparse"].append(csr)
results["sparse"] = vstack(results["sparse"]).tocsr()
if self._encode_config["return_colbert_vecs"] is True:
Expand Down

0 comments on commit 692dc6c

Please sign in to comment.