Skip to content

Commit

Permalink
fix memory leak problems with cuszx
Browse files Browse the repository at this point in the history
  • Loading branch information
danlkv committed Dec 16, 2023
1 parent 12e21f8 commit d66c90b
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 28 deletions.
61 changes: 37 additions & 24 deletions qtensor/compression/CompressedTensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from qtree.system_defs import NP_ARRAY_TYPE
from .Compressor import NumpyCompressor, Compressor


def iterate_indices(indices: list):
if len(indices)==0:
if len(indices) == 0:
return [tuple()]
ranges = [range(v.size) for v in indices]
return itertools.product(*ranges)
Expand All @@ -18,11 +19,16 @@ class CompressedTensor(Tensor):
The data array is split along several indices S into 2^|S| parts
"""
def __init__(self, name, indices,
data_key=None, data=None,
slice_indices=[],
compressor:Compressor=NumpyCompressor()
):

def __init__(
self,
name,
indices,
data_key=None,
data=None,
slice_indices=[],
compressor: Compressor = NumpyCompressor(),
):
"""
Initialize the tensor
name: str,
Expand All @@ -49,7 +55,14 @@ def __init__(self, name, indices,
self._dtype = None

@classmethod
def empty(cls, name, indices, slice_indices=[], compressor=NumpyCompressor(), dtype:type=NP_ARRAY_TYPE):
def empty(
cls,
name,
indices,
slice_indices=[],
compressor=NumpyCompressor(),
dtype: type = NP_ARRAY_TYPE,
):
t = super().empty(name, indices, dtype)
t.compressor = compressor
if slice_indices:
Expand All @@ -63,18 +76,14 @@ def compress_indices(self, indices: list):
Does not support compressing when already compressed
"""
slice_dict = {
i: slice(None) for i in self.indices
}
slice_dict = {i: slice(None) for i in self.indices}
data_chunks = []
for ivals in iterate_indices(indices):
for ix, ival in zip(indices, ivals):
slice_dict[ix] = ival# slice(ival, ival+1)
slice_dict[ix] = ival # slice(ival, ival+1)
dslice = self.data[tuple(slice_dict[i] for i in self.indices)]

data_chunks.append(
self.compressor.compress(dslice)
)
data_chunks.append(self.compressor.compress(dslice))
del dslice
self._data = data_chunks
self.slice_indices = indices
Expand All @@ -92,7 +101,7 @@ def array_indices(self):

def get_chunk(self, ivals):
dims = [v.size for v in self.slice_indices]
if len(ivals)==0:
if len(ivals) == 0:
flat_ix = 0
else:
flat_ix = np.ravel_multi_index(ivals, dims)
Expand All @@ -104,13 +113,15 @@ def set_chunk(self, ivals, chunk: np.ndarray):
if self._dtype is None:
self._dtype = chunk.dtype
else:
assert self.dtype == chunk.dtype, f"Chunk dtype {chunk.dtype} does not match tensor dtype {self.dtype}"
assert (
self.dtype == chunk.dtype
), f"Chunk dtype {chunk.dtype} does not match tensor dtype {self.dtype}"
# --

if self._data is None:
self._data = np.empty(2**len(self.slice_indices), dtype=object)
self._data = np.empty(2 ** len(self.slice_indices), dtype=object)
dims = [v.size for v in self.slice_indices]
if len(ivals)==0:
if len(ivals) == 0:
flat_ix = 0
else:
flat_ix = np.ravel_multi_index(ivals, dims)
Expand Down Expand Up @@ -138,17 +149,19 @@ def __getitem__(self, key):
chunk_slice = chunk[tuple(chunk_slices_ints)]
return Tensor(new_name, new_indices, data=chunk_slice)


def __str__(self):
array_ix = ','.join(map(str, self.array_indices))
split_ix= ','.join(map(str, self.slice_indices))
return f'{self._name}{{{split_ix}}}({array_ix})'
array_ix = ",".join(map(str, self.array_indices))
split_ix = ",".join(map(str, self.slice_indices))
return f"{self._name}{{{split_ix}}}({array_ix})"

def copy(self, name=None, indices=None, data_key=None, data=None):
raise NotImplementedError()

def __repr__(self):
return self.__str__()



def __del__(self):
if self._data is not None:
for chunk in self._data:
self.compressor.free_compressed(chunk)
del self
1 change: 1 addition & 0 deletions qtensor/compression/szx/src/cuszx_entry.cu
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,7 @@ size_t better_post_proc(size_t *outSize, float *oriData, unsigned char *meta,
//outBytes = (unsigned char*)malloc(out_size);
unsigned char* r = outBytes;
unsigned char* r_old = outBytes;
// cudaDeviceSynchronize(); printf("%s\n",cudaGetLastError());
checkCudaErrors(cudaMemset(r, SZx_VER_MAJOR, sizeof(char)));
checkCudaErrors(cudaMemset(r+1, SZx_VER_MINOR, sizeof(char)));
checkCudaErrors(cudaMemset(r+2, 1, sizeof(char)));
Expand Down
15 changes: 11 additions & 4 deletions qtensor/compression/tests/test_memory_leak.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_leak_contract():
dtype = cupy.complex64
dtype_size = dtype(0).nbytes
MB_elems = int(1024**2 / dtype_size)
MB_target = 128 # target for largest tensor
MB_target = 64 # target for largest tensor
N = MB_target * MB_elems
W_target = int(np.log2(N))
print(f"== Testing memory leak with {N} elements and {MB_target} MB array ==")
Expand All @@ -73,7 +73,7 @@ def test_leak_contract():
_nvsmi_handle = _init_nvsmi()

As, Bs = W_target - 4, W_target - 2
common_num = int((As + Bs - W_target)/2)
common_num = int((As + Bs - W_target) / 2)
print(f"Common indices: {common_num}, W_target: {W_target}")
avars = [Var(i) for i in range(As)]
bvars = [Var(i) for i in range(common_num)] + [
Expand All @@ -82,20 +82,27 @@ def test_leak_contract():
print("A vars", avars)
print("B vars", bvars)
TA = Tensor.empty("A", avars)
TA.data = np.random.rand(*TA.shape).astype(dtype)
TB = Tensor.empty("B", bvars)
TB.data = np.random.rand(*TB.shape).astype(dtype)

_mem_histories = []
for j in range(100):
res = compressed_contract(
TA,
TB,
avars[:common_num],
W_target,
W_target - 1,
c,
einsum=cupy.einsum,
move_data=cupy.array,
)
[c.free_compressed(x) for x in res.data]
print(f"Result indices: {res.indices}")
print(f"Result: {res}")
_mem = _get_nvsmi_mem(_nvsmi_handle) / 1024**3
print(f"== [{j}] Memory usage: {_mem} GB ==")
_mem_histories.append(_mem)
print(
f"== [{j}] Memory usage: {_get_nvsmi_mem(_nvsmi_handle) / 1024 ** 3} GB =="
f"== [{j}] Memory history: {[np.round(x, 2) for x in _mem_histories]} GB =="
)

0 comments on commit d66c90b

Please sign in to comment.