Skip to content

Commit

Permalink
cuda : supports running on CPU for GGML_USE_CUBLAS=ON build (ggergano…
Browse files Browse the repository at this point in the history
…v#3946)

* protyping the idea that supports running on CPU for a GGML_USE_CUBLAS=on build

* doc: add comments to ggml_cublas_loaded()

* fix defined(...)
  • Loading branch information
wsxiaoys authored Nov 7, 2023
1 parent 381efbf commit 46876d2
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 75 deletions.
17 changes: 16 additions & 1 deletion ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5790,6 +5790,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
CUDA_CHECK(cudaFree(ptr));
}

static bool g_cublas_loaded = false;

bool ggml_cublas_loaded(void) {
return g_cublas_loaded;
}

void ggml_init_cublas() {
static bool initialized = false;
Expand All @@ -5803,7 +5808,12 @@ void ggml_init_cublas() {
CUDA_CHECK(cudaDeviceSynchronize());
#endif

CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
initialized = true;
g_cublas_loaded = false;
return;
}

GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
int64_t total_vram = 0;
#if defined(GGML_CUDA_FORCE_MMQ)
Expand Down Expand Up @@ -5851,6 +5861,7 @@ void ggml_init_cublas() {
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));

initialized = true;
g_cublas_loaded = true;
}
}

Expand Down Expand Up @@ -7158,6 +7169,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
}

bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
if (!g_cublas_loaded) return false;

const int64_t ne10 = src1->ne[0];

const int64_t ne0 = dst->ne[0];
Expand Down Expand Up @@ -7843,6 +7856,8 @@ void ggml_cuda_free_scratch() {
}

bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
if (!g_cublas_loaded) return false;

ggml_cuda_func_t func;
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
Expand Down
5 changes: 5 additions & 0 deletions ggml-cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@ extern "C" {

#define GGML_CUDA_MAX_DEVICES 16

// Always success. To check if CUDA is actually loaded, use `ggml_cublas_loaded`.
GGML_API void ggml_init_cublas(void);

// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
GGML_API bool ggml_cublas_loaded(void);

GGML_API void * ggml_cuda_host_malloc(size_t size);
GGML_API void ggml_cuda_host_free(void * ptr);

Expand Down
Loading

0 comments on commit 46876d2

Please sign in to comment.