From 4f3b0d92bffa5610206766eeba44c8fa5dd74a5a Mon Sep 17 00:00:00 2001 From: dream-tentacle <127286614+dream-tentacle@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:36:24 +0800 Subject: [PATCH] correct return type for encoding --- FlagEmbedding/flag_models.py | 14 +++++++------- Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/FlagEmbedding/flag_models.py b/FlagEmbedding/flag_models.py index 57810b8c..bc93431b 100644 --- a/FlagEmbedding/flag_models.py +++ b/FlagEmbedding/flag_models.py @@ -237,7 +237,7 @@ def __init__( def encode_queries(self, queries: Union[List[str], str], batch_size: int = 256, max_length: int = 512, - convert_to_numpy: bool = True) -> np.ndarray: + convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: ''' This function will be used for retrieval task if there is a instruction for queries, we will add it to the query text @@ -252,7 +252,7 @@ def encode_corpus(self, corpus: Union[List[str], str], batch_size: int = 256, max_length: int = 512, - convert_to_numpy: bool = True) -> np.ndarray: + convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: ''' This function will be used for retrieval task encode corpus for retrieval task @@ -264,7 +264,7 @@ def encode(self, sentences: Union[List[str], str], batch_size: int = 256, max_length: int = 512, - convert_to_numpy: bool = True) -> np.ndarray: + convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: if self.num_gpus > 0: batch_size = batch_size * self.num_gpus self.model.eval() @@ -341,7 +341,7 @@ def __init__( def encode_queries(self, queries: Union[List[str], str], batch_size: int = 256, max_length: int = 512, - convert_to_numpy: bool = True) -> np.ndarray: + convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: ''' This function will be used for retrieval task if there is a instruction for queries, we will add it to the query text @@ -350,7 +350,7 @@ def encode_queries(self, queries: Union[List[str], str], if isinstance(queries, str): input_texts = self.query_instruction_for_retrieval + queries else: - input_texts = ['{}{}'.format(self.query_instruction_for_retrieval, q) for q in queries] + input_texts = [(self.query_instruction_for_retrieval + q) for q in queries] else: input_texts = queries return self.encode(input_texts, batch_size=batch_size, max_length=max_length, convert_to_numpy=convert_to_numpy) @@ -359,7 +359,7 @@ def encode_corpus(self, corpus: Union[List[str], str], batch_size: int = 256, max_length: int = 512, - convert_to_numpy: bool = True) -> np.ndarray: + convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: ''' This function will be used for retrieval task encode corpus for retrieval task @@ -371,7 +371,7 @@ def encode(self, sentences: Union[List[str], str], batch_size: int = 256, max_length: int = 512, - convert_to_numpy: bool = True) -> np.ndarray: + convert_to_numpy: bool = True) -> Union[np.ndarray, torch.Tensor]: if self.num_gpus > 0: batch_size = batch_size * self.num_gpus self.model.eval() diff --git a/Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb b/Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb index 198228ca..81c95243 100644 --- a/Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb +++ b/Tutorials/1_Embedding/1.2.1_BGE_Series.ipynb @@ -266,7 +266,7 @@ "```\n", "LLMEmbedder.encode_keys(keys, batch_size=256, max_length=512, task='qa')\n", "```\n", - "Similarly, *encode_keys()* also calls *_encode()* and automatically add instructions according to given task." + "Similarly, *encode_keys()* also calls *_encode()* and automatically add instructions according to the given task." ] }, {