Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cupybackend, mixed backends #37

Open
wants to merge 59 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
3dc26db
add cutensor backend
Jul 6, 2021
e7ef12b
add experiment group arg
Jul 6, 2021
149e340
update experiment group
Jul 7, 2021
4477818
update fixed size tensor contraction
Jul 9, 2021
b2f4c71
update measurement for tncontract
Jul 13, 2021
7bfb3fe
update byte calculation
Jul 13, 2021
f765aeb
restructure and change torch gen_tensor from cpu to gpu
sansangela Jul 16, 2021
3ac3a76
restructure and change torch gen_tensor from cpu to gpu
Jul 16, 2021
a0e73e7
Merge branch 'matmul' of https://github.com/DaniloZZZ/QTensor into ma…
sansangela Jul 19, 2021
5a60560
add bridge
sansangela Jul 19, 2021
c92f7e7
random generate tensors
sansangela Jul 20, 2021
974a158
add benchmark functions for matmul and tncontract
sansangela Jul 21, 2021
0660314
remove get_operation
sansangela Jul 21, 2021
2d3964b
add random generate tensor
sansangela Jul 23, 2021
4659a7b
update ggen_sizes
sansangela Jul 23, 2021
0df2083
add random tensor transpose
sansangela Jul 26, 2021
7a8b263
add test
sansangela Aug 4, 2021
5c6c28a
update args init
sansangela Aug 4, 2021
bf01bd3
add transpose backend
sansangela Aug 10, 2021
e44b1eb
add transposed backends
sansangela Aug 10, 2021
33049dc
update timing
sansangela Aug 11, 2021
dc16ef7
add transpose backend & update timing
sansangela Aug 11, 2021
42c65bb
update dtype casting
sansangela Aug 17, 2021
1a88493
Mixed BE Implemented
huaxuan250 Aug 18, 2021
d56013d
Modified Json Format
huaxuan250 Aug 18, 2021
eccbb37
Higher Threshold and Complete
huaxuan250 Aug 18, 2021
41b17a5
Adding Numpy-Torch Conpatibility
huaxuan250 Aug 26, 2021
f96f695
Adding Watershed Control
huaxuan250 Aug 31, 2021
fafb4f0
Accurate Width Calculation
huaxuan250 Aug 31, 2021
ddbfa59
Auto Threshold Optimization
huaxuan250 Sep 3, 2021
731a05b
Basic Performance Probing using 12 4 3
huaxuan250 Sep 5, 2021
c3c6d90
Bris Benchmarking by Bucket
huaxuan250 Sep 8, 2021
bdce309
Updated Parameter
huaxuan250 Sep 8, 2021
eaaabbf
merged backends
sansangela Sep 8, 2021
1889e3b
Mixed Merged Working
huaxuan250 Sep 11, 2021
76ea8ff
Name Change
huaxuan250 Sep 14, 2021
53635bb
QAOA Mix Merge Operational
huaxuan250 Sep 14, 2021
31c1ed7
String Parsing Fixed
huaxuan250 Sep 14, 2021
0bf7215
Bucket Iso
huaxuan250 Nov 17, 2021
747b143
Iso Done
huaxuan250 Nov 24, 2021
74f3a33
Probing Done
huaxuan250 Jan 5, 2022
32e9b70
Probing Result on V100
huaxuan250 Jan 5, 2022
d11c067
Fitting Done
huaxuan250 Jan 12, 2022
1c77a20
Merge branch 'cupybackend' of https://github.com/DaniloZZZ/QTensor in…
huaxuan250 Jan 12, 2022
b07d25d
Updated Result
huaxuan250 Jan 12, 2022
c578a79
1.12 Result
huaxuan250 Jan 12, 2022
075dce1
Probing Test Metrics
huaxuan250 Jan 19, 2022
a974fa9
Merge branch 'cupybackend' of https://github.com/DaniloZZZ/QTensor in…
huaxuan250 Jan 19, 2022
1fb42f7
Probing Done with Tests
huaxuan250 Jan 19, 2022
9f72abe
load detection
huaxuan250 Feb 2, 2022
c78e4fb
Load Detect Focus on maxwidth
huaxuan250 Feb 9, 2022
d0600e8
Updated Thr control
huaxuan250 Feb 9, 2022
3b02dd2
Basic Util Completes
huaxuan250 Feb 23, 2022
eaceb1d
CPU UTIL DONE FOR BASIC PROBING
huaxuan250 Feb 23, 2022
b3015b6
Example Code Done
huaxuan250 Feb 23, 2022
fb359d2
Change to Timing
huaxuan250 Feb 23, 2022
cfb320b
test script for merged backend
danlkv May 5, 2022
87b3a37
rename gpu_mix_test file
danlkv May 5, 2022
593b1d9
add notebook with tensor compression tests
danlkv May 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 76 additions & 13 deletions qtensor/contraction_backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,101 @@
from .torch import TorchBackend
from .cupy import CuPyBackend
from .mkl import CMKLExtendedBackend
from .cupy import CuPyBackend
from .transposed import TransposedBackend
from .opt_einsum import OptEinusmBackend
from .performance_measurement_decorator import PerfNumpyBackend, PerfBackend
from .transpose_backend import NumpyTranspoedBackend, TorchTransposedBackend, CupyTransposedBackend, CutensorTransposedBackend
from .torch_mix import TorchMixBackend
from .performance_measurement_decorator import PerfNumpyBackend, PerfBackend, GPUPerfBackend
from .mix_decorator import MixBackend

def get_backend(name):
backend_dict = {
'einsum':NumpyBackend,
'torch': TorchBackend,
'mkl':CMKLExtendedBackend,
'tr_einsum': TransposedBackend,
'torch_cpu': TorchBackend,
'torch_gpu': TorchBackend,
'mkl': CMKLExtendedBackend,
'tr_einsum': NumpyTranspoedBackend,
'opt_einsum': OptEinusmBackend,
'cupy': CuPyBackend
'tr_torch': TorchTransposedBackend,
'cupy': CuPyBackend,
'tr_cupy': CupyTransposedBackend,
'tr_cutensor': CutensorTransposedBackend,
'torch_mix': TorchMixBackend
}
if name in ["torch_gpu", "torch_cpu"]:
return backend_dict['torch'](device = name[-3:])
if name == "torch_cpu":
return backend_dict['torch_cpu'](device = "cpu")
else:
return backend_dict[name]()

def get_perf_backend(name):
def get_cpu_perf_backend(name):
class MyPerfBackend(PerfBackend):
Backend = {
'einsum':NumpyBackend,
'torch': TorchBackend,
'torch_cpu': TorchBackend,
'torch_gpu': TorchBackend,
'mkl':CMKLExtendedBackend,
'tr_einsum': TransposedBackend,
'opt_einsum': OptEinusmBackend,
'tr_einsum': NumpyTranspoedBackend,
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo in class name

'opt_einsum': OptEinusmBackend,
}[name]

if name == "torch_cpu":
return MyPerfBackend(device="cpu")
else:
return MyPerfBackend()

def get_gpu_perf_backend(name):
class MyPerfBackend(GPUPerfBackend):
Backend = {
'torch_gpu': TorchBackend,
'cupy': CuPyBackend,
'tr_torch': TorchTransposedBackend,
'tr_cupy': CupyTransposedBackend,
'tr_cutensor': CutensorTransposedBackend
}[name]

if name in ["torch_gpu", "torch_cpu"]:
return MyPerfBackend(device = name[-3:])
if name == "torch_gpu":
return MyPerfBackend(device="gpu")
else:
return MyPerfBackend()



# def get_mixed_backend(cpu_name, gpu_name):
# class MyMixedBackend(MixBackend):
# CBE = {
# 'einsum':NumpyBackend,
# 'torch_cpu': TorchBackend,
# 'mkl':CMKLExtendedBackend,
# 'opt_einsum': OptEinusmBackend,
# 'tr_einsum': NumpyTranspoedBackend,
# 'opt_einsum': OptEinusmBackend,
# }[cpu_name]
# GBE = {
# 'torch_gpu': TorchBackend,
# 'cupy': CuPyBackend,
# 'tr_torch': TorchTransposedBackend,
# 'tr_cupy': CupyTransposedBackend,
# 'tr_cutensor': CutensorTransposedBackend
# }[gpu_name]

# if cpu_name == "torch_cpu":
# return MyMixedBackend(device = "cpu")
# else:
# return MyMixedBackend()

def get_mixed_backend(cpu_name, gpu_name):

cpu_be = get_backend(cpu_name)
gpu_be = get_backend(gpu_name)

return MixBackend(cpu_be, gpu_be)


def get_mixed_perf_backend(cpu_name, gpu_name):

cpu_be = get_cpu_perf_backend(cpu_name)
gpu_be = get_gpu_perf_backend(gpu_name)

return MixBackend(cpu_be, gpu_be)

11 changes: 8 additions & 3 deletions qtensor/contraction_backends/cupy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import qtree
from qtensor.tools.lazy_import import cupy as cp
import numpy as np
from qtensor.contraction_backends import ContractionBackend

mempool = mempool = cp.get_default_memory_pool()
Expand All @@ -20,8 +21,8 @@ def process_bucket(self, bucket, no_sum=False):
'''
Change 1: Using cp.einsum not torch.einsum
'''
result_data = cp.einsum(expr, result_data, tensor.data)

result_data = cp.einsum(expr, cp.asarray(result_data), cp.asarray(tensor.data))
#print("result data: ",type(result_data))
# Merge and sort indices and shapes
result_indices = tuple(sorted(
set(result_indices + tensor.indices),
Expand Down Expand Up @@ -105,4 +106,8 @@ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
return sliced_buckets

def get_result_data(self, result):
return result.data
#print(type(result.data))
try:
return result.data.get()
except :
return result.data
67 changes: 67 additions & 0 deletions qtensor/contraction_backends/mix_decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from qtensor.contraction_backends import ContractionBackend
"""
class MixedBe(ConBE):
be1: cpu_be
be2: gpu_be

def get_sliced_bucket():
normal slicing
either use be1_get_sliuced or naive implementation
np.array for convinience

def process_bucket():
- Check input bucket width
- If larger than 8, use gpu_be.process(bucket)
- Else, use less than 8, use cpu_be.process(bucket)

def get_result_data():
- always use gpu_be's get_result
- so that the gpu_be shall handle the gpu-cpu transfer all the time

"""


'''
Input: Array of either (indices) or a tensor
If a tensor, use t.indices for conversion
'''
def bucketWidth(bucket):
bucket_width = 0
for tensor in bucket:
tensor_len = 0
if tensor is tuple:
tensor_len = len(tensor)
if tensor_len > bucket_width:
bucket_width = tensor_len
else:
tensor_len = len(tensor.indices)
if tensor_len > bucket_width:
bucket_width = tensor_len
return bucket_width




'''
I/O: Actual BE Objects -> Wrapped Class
'''

class MixBackend(ContractionBackend):

def __init__(self, cpu_be, gpu_be):
self.cpu_be = cpu_be
self.gpu_be = gpu_be

def process_bucket(self, bucket, no_sum = False):
bucket_width = bucketWidth(bucket)
if bucket_width >= 11:
danlkv marked this conversation as resolved.
Show resolved Hide resolved
#print("In GPU")
return self.gpu_be.process_bucket(bucket, no_sum)
else:
return self.cpu_be.process_bucket(bucket, no_sum)

def get_sliced_buckets(self, buckets, data_dict, slice_dict):
return self.cpu_be.get_sliced_buckets(buckets, data_dict, slice_dict)

def get_result_data(self, result):
return self.gpu_be.get_result_data(result)
10 changes: 10 additions & 0 deletions qtensor/contraction_backends/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ def __init__(self):
#self.status_bar = tqdm(desc='Current status', position=3, bar_format='{desc}')

def process_bucket(self, bucket, no_sum=False):
'''
TODO: Preprocess the bucket to make sure the all data are in numpy format
'''
for tensor in bucket:
if not isinstance(tensor._data, np.ndarray):
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to do if isinstance(tensor._data, cupy.ndarray)?

try:
tensor._data = tensor._data.cpu().numpy()
except:
pass

res = np_framework.process_bucket_np(bucket, no_sum=no_sum)
return res

Expand Down
19 changes: 19 additions & 0 deletions qtensor/contraction_backends/performance_measurement_decorator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from qtensor.contraction_backends import ContractionBackend, NumpyBackend
from qtensor.utils import ReportTable
from pyrofiler import timing
import torch

class PerfBackend(ContractionBackend):
Backend = ContractionBackend
Expand Down Expand Up @@ -120,3 +121,21 @@ def gen_report(self, show = True):

class PerfNumpyBackend(PerfBackend):
Backend = NumpyBackend


class GPUPerfBackend(PerfBackend):
def process_bucket(self, bucket, no_sum=False):
indices = [tensor.indices for tensor in bucket]

start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()

out = self.backend.process_bucket(bucket, no_sum=no_sum)

end.record()
torch.cuda.synchronize()
time= start.elapsed_time(end)/1000

self._profile_callback(time,'process bucket time',indices)
return out
64 changes: 48 additions & 16 deletions qtensor/contraction_backends/torch.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from qtensor.tools.lazy_import import torch
import qtree
import numpy as np



from qtree import np_framework
from qtensor.contraction_backends import ContractionBackend
def qtree2torch_tensor(tensor, data_dict):
""" Converts qtree tensor to pytorch tensor using data dict"""
Expand All @@ -19,21 +17,54 @@ def qtree2torch_tensor(tensor, data_dict):


class TorchBackend(ContractionBackend):

def __init__(self, device = "cpu"):
def __init__(self, device = "gpu"):
self.device = device
self.cuda_available = torch.cuda.is_available()


def process_bucket(self, bucket, no_sum=False):
result_indices = bucket[0].indices
result_data = bucket[0].data

if not isinstance(result_data, torch.Tensor):
#print("Encountering: ",type(result_data))
if self.device == 'gpu' and torch.cuda.is_available():
cuda = torch.device('cuda')
result_data = torch.from_numpy(result_data).to(cuda)
else:
result_data = torch.from_numpy(result_data)

for tensor in bucket[1:]:

expr = qtree.utils.get_einsum_expr(
list(map(int, result_indices)), list(map(int, tensor.indices))
)


'''
Objective: Change Numpy Array into Tensor on GPU
'''
if not isinstance(tensor._data, torch.Tensor):
if self.device == 'gpu' and torch.cuda.is_available():
cuda = torch.device('cuda')
tensor._data = torch.from_numpy(tensor._data).to(cuda)
else:
tensor._data = torch.from_numpy(tensor._data)

'''
Change: input data type may not be the same as the device type, hence we must make device type consistent with the backend device type
'''


if self.device == 'gpu':
if result_data.device != "gpu":
result_data = result_data.to(torch.device('cuda'))
if tensor.data.device != "gpu":
tensor._data = tensor._data.to(torch.device("cuda"))
else:
if result_data.device != "cpu":
result_data = result_data.cpu()
if tensor.data.device != "cpu":
tensor._data = tensor._data.cpu()

result_data = torch.einsum(expr, result_data, tensor.data)

# Merge and sort indices and shapes
Expand Down Expand Up @@ -70,15 +101,13 @@ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
# sort tensor dimensions
transpose_order = np.argsort(list(map(int, tensor.indices)))
data = data_dict[tensor.data_key]
if not isinstance(data, torch.Tensor):
if self.device == "cpu":
data = torch.from_numpy(data)
if not isinstance(data, torch.Tensor):
if self.device == 'gpu' and torch.cuda.is_available():
cuda = torch.device('cuda')
data = torch.from_numpy(data).to(cuda)
else:
if self.cuda_available:
cuda = torch.device('cuda')
data = torch.from_numpy(data).to(cuda)
else:
raise Exception("cuda is not available on this machine")
data = torch.from_numpy(data)

data = data.permute(tuple(transpose_order))
# transpose indices
indices_sorted = [tensor.indices[pp]
Expand Down Expand Up @@ -107,4 +136,7 @@ def get_sliced_buckets(self, buckets, data_dict, slice_dict):
return sliced_buckets

def get_result_data(self, result):
return result.data
try:
return result.data.cpu()
except:
return result.data
Loading