fix memory leak problems with cuszx

danlkv · Dec 16, 2023 · d66c90b · d66c90b
1 parent 12e21f8
commit d66c90b
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 28 deletions.
diff --git a/qtensor/compression/CompressedTensor.py b/qtensor/compression/CompressedTensor.py
@@ -4,8 +4,9 @@
 from qtree.system_defs import NP_ARRAY_TYPE
 from .Compressor import NumpyCompressor, Compressor
 
+
 def iterate_indices(indices: list):
-    if len(indices)==0:
+    if len(indices) == 0:
         return [tuple()]
     ranges = [range(v.size) for v in indices]
     return itertools.product(*ranges)
@@ -18,11 +19,16 @@ class CompressedTensor(Tensor):
     The data array is split along several indices S into 2^|S| parts
 
     """
-    def __init__(self, name, indices,
-                 data_key=None, data=None,
-                 slice_indices=[],
-                 compressor:Compressor=NumpyCompressor()
-                ):
+
+    def __init__(
+        self,
+        name,
+        indices,
+        data_key=None,
+        data=None,
+        slice_indices=[],
+        compressor: Compressor = NumpyCompressor(),
+    ):
         """
         Initialize the tensor
         name: str,
@@ -49,7 +55,14 @@ def __init__(self, name, indices,
             self._dtype = None
 
     @classmethod
-    def empty(cls, name, indices, slice_indices=[], compressor=NumpyCompressor(), dtype:type=NP_ARRAY_TYPE):
+    def empty(
+        cls,
+        name,
+        indices,
+        slice_indices=[],
+        compressor=NumpyCompressor(),
+        dtype: type = NP_ARRAY_TYPE,
+    ):
         t = super().empty(name, indices, dtype)
         t.compressor = compressor
         if slice_indices:
@@ -63,18 +76,14 @@ def compress_indices(self, indices: list):
 
         Does not support compressing when already compressed
         """
-        slice_dict = {
-            i: slice(None) for i in self.indices
-        }
+        slice_dict = {i: slice(None) for i in self.indices}
         data_chunks = []
         for ivals in iterate_indices(indices):
             for ix, ival in zip(indices, ivals):
-                slice_dict[ix] = ival# slice(ival, ival+1)
+                slice_dict[ix] = ival  # slice(ival, ival+1)
             dslice = self.data[tuple(slice_dict[i] for i in self.indices)]
 
-            data_chunks.append(
-                self.compressor.compress(dslice)
-            )
+            data_chunks.append(self.compressor.compress(dslice))
             del dslice
         self._data = data_chunks
         self.slice_indices = indices
@@ -92,7 +101,7 @@ def array_indices(self):
 
     def get_chunk(self, ivals):
         dims = [v.size for v in self.slice_indices]
-        if len(ivals)==0:
+        if len(ivals) == 0:
             flat_ix = 0
         else:
             flat_ix = np.ravel_multi_index(ivals, dims)
@@ -104,13 +113,15 @@ def set_chunk(self, ivals, chunk: np.ndarray):
         if self._dtype is None:
             self._dtype = chunk.dtype
         else:
-            assert self.dtype == chunk.dtype, f"Chunk dtype {chunk.dtype} does not match tensor dtype {self.dtype}"
+            assert (
+                self.dtype == chunk.dtype
+            ), f"Chunk dtype {chunk.dtype} does not match tensor dtype {self.dtype}"
         # --
 
         if self._data is None:
-            self._data = np.empty(2**len(self.slice_indices), dtype=object)
+            self._data = np.empty(2 ** len(self.slice_indices), dtype=object)
         dims = [v.size for v in self.slice_indices]
-        if len(ivals)==0:
+        if len(ivals) == 0:
             flat_ix = 0
         else:
             flat_ix = np.ravel_multi_index(ivals, dims)
@@ -138,17 +149,19 @@ def __getitem__(self, key):
         chunk_slice = chunk[tuple(chunk_slices_ints)]
         return Tensor(new_name, new_indices, data=chunk_slice)
 
-
     def __str__(self):
-        array_ix = ','.join(map(str, self.array_indices))
-        split_ix= ','.join(map(str, self.slice_indices))
-        return f'{self._name}{{{split_ix}}}({array_ix})'
+        array_ix = ",".join(map(str, self.array_indices))
+        split_ix = ",".join(map(str, self.slice_indices))
+        return f"{self._name}{{{split_ix}}}({array_ix})"
 
     def copy(self, name=None, indices=None, data_key=None, data=None):
         raise NotImplementedError()
 
     def __repr__(self):
         return self.__str__()
 
-
-
+    def __del__(self):
+        if self._data is not None:
+            for chunk in self._data:
+                self.compressor.free_compressed(chunk)
+        del self
diff --git a/qtensor/compression/szx/src/cuszx_entry.cu b/qtensor/compression/szx/src/cuszx_entry.cu
@@ -898,6 +898,7 @@ size_t better_post_proc(size_t *outSize, float *oriData, unsigned char *meta,
     //outBytes = (unsigned char*)malloc(out_size);
 	unsigned char* r = outBytes;
     unsigned char* r_old = outBytes;
+    // cudaDeviceSynchronize(); printf("%s\n",cudaGetLastError());
     checkCudaErrors(cudaMemset(r, SZx_VER_MAJOR, sizeof(char)));
     checkCudaErrors(cudaMemset(r+1, SZx_VER_MINOR, sizeof(char)));
     checkCudaErrors(cudaMemset(r+2, 1, sizeof(char)));

diff --git a/qtensor/compression/tests/test_memory_leak.py b/qtensor/compression/tests/test_memory_leak.py
@@ -64,7 +64,7 @@ def test_leak_contract():
     dtype = cupy.complex64
     dtype_size = dtype(0).nbytes
     MB_elems = int(1024**2 / dtype_size)
-    MB_target = 128 # target for largest tensor
+    MB_target = 64  # target for largest tensor
     N = MB_target * MB_elems
     W_target = int(np.log2(N))
     print(f"== Testing memory leak with {N} elements and {MB_target} MB array ==")
@@ -73,7 +73,7 @@ def test_leak_contract():
     _nvsmi_handle = _init_nvsmi()
 
     As, Bs = W_target - 4, W_target - 2
-    common_num = int((As + Bs - W_target)/2)
+    common_num = int((As + Bs - W_target) / 2)
     print(f"Common indices: {common_num}, W_target: {W_target}")
     avars = [Var(i) for i in range(As)]
     bvars = [Var(i) for i in range(common_num)] + [
@@ -82,20 +82,27 @@ def test_leak_contract():
     print("A vars", avars)
     print("B vars", bvars)
     TA = Tensor.empty("A", avars)
+    TA.data = np.random.rand(*TA.shape).astype(dtype)
     TB = Tensor.empty("B", bvars)
+    TB.data = np.random.rand(*TB.shape).astype(dtype)
 
+    _mem_histories = []
     for j in range(100):
         res = compressed_contract(
             TA,
             TB,
             avars[:common_num],
-            W_target,
+            W_target - 1,
             c,
             einsum=cupy.einsum,
             move_data=cupy.array,
         )
+        [c.free_compressed(x) for x in res.data]
         print(f"Result indices: {res.indices}")
         print(f"Result: {res}")
+        _mem = _get_nvsmi_mem(_nvsmi_handle) / 1024**3
+        print(f"== [{j}] Memory usage: {_mem} GB ==")
+        _mem_histories.append(_mem)
         print(
-            f"== [{j}] Memory usage: {_get_nvsmi_mem(_nvsmi_handle) / 1024 ** 3} GB =="
+            f"== [{j}] Memory history: {[np.round(x, 2) for x in _mem_histories]} GB =="
         )