Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cannot open shared object file: No such file or directory #238

Open
abdelkareemkobo opened this issue Aug 15, 2024 · 2 comments
Open

cannot open shared object file: No such file or directory #238

abdelkareemkobo opened this issue Aug 15, 2024 · 2 comments

Comments

@abdelkareemkobo
Copy link

{
"name": "ImportError",
"message": ".cache/torch_extensions/py310_cu118/decompress_residuals_cpp/decompress_residuals_cpp.so: cannot open shared object file: No such file or directory",
"stack": "---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[4], line 1
----> 1 RAG.index(
2 collection=[full_document],
3 document_ids=['miyazaki'],
4 document_metadatas=[{"entity": "person", "source": "wikipedia"}],
5 index_name="Miyazaki",
6 max_document_length=180,
7 split_documents=True
8 )

File ~/mambaforge/lib/python3.10/site-packages/ragatouille/RAGPretrainedModel.py:211, in RAGPretrainedModel.index(self, collection, document_ids, document_metadatas, index_name, overwrite_index, max_document_length, split_documents, document_splitter_fn, preprocessing_fn, bsize, use_faiss)
202 document_splitter_fn = None
203 collection, pid_docid_map, docid_metadata_map = self._process_corpus(
204 collection,
205 document_ids,
(...)
209 max_document_length,
210 )
--> 211 return self.model.index(
212 collection,
213 pid_docid_map=pid_docid_map,
214 docid_metadata_map=docid_metadata_map,
215 index_name=index_name,
216 max_document_length=max_document_length,
217 overwrite=overwrite_index,
218 bsize=bsize,
219 use_faiss=use_faiss,
220 )

File ~/mambaforge/lib/python3.10/site-packages/ragatouille/models/colbert.py:341, in ColBERT.index(self, collection, pid_docid_map, docid_metadata_map, index_name, max_document_length, overwrite, bsize, use_faiss)
337 self.docid_pid_map[docid].append(pid)
339 self.docid_metadata_map = docid_metadata_map
--> 341 self.model_index = ModelIndexFactory.construct(
342 "PLAID",
343 self.config,
344 self.checkpoint,
345 self.collection,
346 self.index_name,
347 overwrite,
348 verbose=self.verbose != 0,
349 bsize=bsize,
350 use_faiss=use_faiss,
351 )
352 self.config = self.model_index.config
353 self._save_index_metadata()

File ~/mambaforge/lib/python3.10/site-packages/ragatouille/models/index.py:485, in ModelIndexFactory.construct(index_type, config, checkpoint, collection, index_name, overwrite, verbose, **kwargs)
482 if index_type == "auto":
483 # NOTE: For now only PLAID indexes are supported.
484 index_type = "PLAID"
--> 485 return ModelIndexFactory._MODEL_INDEX_BY_NAME[
486 ModelIndexFactory._raise_if_invalid_index_type(index_type)
487 ].construct(
488 config, checkpoint, collection, index_name, overwrite, verbose, **kwargs
489 )

File ~/mambaforge/lib/python3.10/site-packages/ragatouille/models/index.py:150, in PLAIDModelIndex.construct(config, checkpoint, collection, index_name, overwrite, verbose, **kwargs)
140 @staticmethod
141 def construct(
142 config: ColBERTConfig,
(...)
148 **kwargs,
149 ) -> "PLAIDModelIndex":
--> 150 return PLAIDModelIndex(config).build(
151 checkpoint, collection, index_name, overwrite, verbose, **kwargs
152 )

File ~/mambaforge/lib/python3.10/site-packages/ragatouille/models/index.py:254, in PLAIDModelIndex.build(self, checkpoint, collection, index_name, overwrite, verbose, **kwargs)
248 indexer = Indexer(
249 checkpoint=checkpoint,
250 config=self.config,
251 verbose=verbose,
252 )
253 indexer.configure(avoid_fork_if_possible=True)
--> 254 indexer.index(name=index_name, collection=collection, overwrite=overwrite)
256 return self

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexer.py:80, in Indexer.index(self, name, collection, overwrite)
77 self.erase()
79 if index_does_not_exist or overwrite != 'reuse':
---> 80 self.__launch(collection)
82 return self.index_path

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexer.py:89, in Indexer.__launch(self, collection)
87 shared_queues = []
88 shared_lists = []
---> 89 launcher.launch_without_fork(self.config, collection, shared_lists, shared_queues, self.verbose)
91 return
93 manager = mp.Manager()

File ~/mambaforge/lib/python3.10/site-packages/colbert/infra/launcher.py:93, in Launcher.launch_without_fork(self, custom_config, *args)
90 assert (custom_config.avoid_fork_if_possible or self.run_config.avoid_fork_if_possible)
92 new_config = type(custom_config).from_existing(custom_config, self.run_config, RunConfig(rank=0))
---> 93 return_val = run_process_without_mp(self.callee, new_config, *args)
95 return return_val

File ~/mambaforge/lib/python3.10/site-packages/colbert/infra/launcher.py:109, in run_process_without_mp(callee, config, *args)
106 os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, config.gpus_[:config.nranks]))
108 with Run().context(config, inherit_config=False):
--> 109 return_val = callee(config, *args)
110 torch.cuda.empty_cache()
111 return return_val

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexing/collection_indexer.py:33, in encode(config, collection, shared_lists, shared_queues, verbose)
31 def encode(config, collection, shared_lists, shared_queues, verbose: int = 3):
32 encoder = CollectionIndexer(config=config, collection=collection, verbose=verbose)
---> 33 encoder.run(shared_lists)

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexing/collection_indexer.py:68, in CollectionIndexer.run(self, shared_lists)
65 print_memory_stats(f'RANK:{self.rank}')
67 if not self.config.resume or not self.saver.try_load_codec():
---> 68 self.train(shared_lists) # Trains centroids from selected passages
69 distributed.barrier(self.rank)
70 print_memory_stats(f'RANK:{self.rank}')

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexing/collection_indexer.py:237, in CollectionIndexer.train(self, shared_lists)
234 print_memory_stats(f'RANK:{self.rank}')
235 del sample
--> 237 bucket_cutoffs, bucket_weights, avg_residual = self._compute_avg_residual(centroids, heldout)
239 if self.verbose > 1:
240 print_message(f'avg_residual = {avg_residual}')

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexing/collection_indexer.py:315, in CollectionIndexer._compute_avg_residual(self, centroids, heldout)
314 def _compute_avg_residual(self, centroids, heldout):
--> 315 compressor = ResidualCodec(config=self.config, centroids=centroids, avg_residual=None)
317 heldout_reconstruct = compressor.compress_into_codes(heldout, out_device='cuda' if self.use_gpu else 'cpu')
318 heldout_reconstruct = compressor.lookup_centroids(heldout_reconstruct, out_device='cuda' if self.use_gpu else 'cpu')

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexing/codecs/residual.py:24, in ResidualCodec.init(self, config, centroids, avg_residual, bucket_cutoffs, bucket_weights)
21 def init(self, config, centroids, avg_residual=None, bucket_cutoffs=None, bucket_weights=None):
22 self.use_gpu = config.total_visible_gpus > 0
---> 24 ResidualCodec.try_load_torch_extensions(self.use_gpu)
26 if self.use_gpu > 0:
27 self.centroids = centroids.cuda().half()

File ~/mambaforge/lib/python3.10/site-packages/colbert/indexing/codecs/residual.py:103, in ResidualCodec.try_load_torch_extensions(cls, use_gpu)
100 return
102 print_message(f"Loading decompress_residuals_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...")
--> 103 decompress_residuals_cpp = load(
104 name="decompress_residuals_cpp",
105 sources=[
106 os.path.join(
107 pathlib.Path(file).parent.resolve(), "decompress_residuals.cpp"
108 ),
109 os.path.join(
110 pathlib.Path(file).parent.resolve(), "decompress_residuals.cu"
111 ),
112 ],
113 verbose=os.getenv("COLBERT_LOAD_TORCH_EXTENSION_VERBOSE", "False") == "True",
114 )
115 cls.decompress_residuals = decompress_residuals_cpp.decompress_residuals_cpp
117 print_message(f"Loading packbits_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...")

File ~/mambaforge/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1306, in load(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, is_standalone, keep_intermediates)
1214 def load(name,
1215 sources: Union[str, List[str]],
1216 extra_cflags=None,
(...)
1224 is_standalone=False,
1225 keep_intermediates=True):
1226 """
1227 Load a PyTorch C++ extension just-in-time (JIT).
1228
(...)
1304 ... verbose=True)
1305 """
-> 1306 return _jit_compile(
1307 name,
1308 [sources] if isinstance(sources, str) else sources,
1309 extra_cflags,
1310 extra_cuda_cflags,
1311 extra_ldflags,
1312 extra_include_paths,
1313 build_directory or _get_build_directory(name, verbose),
1314 verbose,
1315 with_cuda,
1316 is_python_module,
1317 is_standalone,
1318 keep_intermediates=keep_intermediates)

File ~/mambaforge/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1736, in _jit_compile(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, is_standalone, keep_intermediates)
1733 if is_standalone:
1734 return _get_exec_path(name, build_directory)
-> 1736 return _import_module_from_library(name, build_directory, is_python_module)

File ~/mambaforge/lib/python3.10/site-packages/torch/utils/cpp_extension.py:2132, in _import_module_from_library(module_name, path, is_python_module)
2130 spec = importlib.util.spec_from_file_location(module_name, filepath)
2131 assert spec is not None
-> 2132 module = importlib.util.module_from_spec(spec)
2133 assert isinstance(spec.loader, importlib.abc.Loader)
2134 spec.loader.exec_module(module)

File :571, in module_from_spec(spec)

File :1176, in create_module(self, spec)

File :241, in _call_with_frames_removed(f, *args, **kwds)

ImportError: /.cache/torch_extensions/py310_cu118/decompress_residuals_cpp/decompress_residuals_cpp.so: cannot open shared object file: No such file or directory"
}

@abdelkareemkobo
Copy link
Author

Solved after creating a new Env with a fresh install for CUDA and pytorch then faced other errors related to Cuda, but it's working now.

@abdelkareemkobo
Copy link
Author

I faced it again after 3 months and can't solve it now :(

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant