nvml: last fixes for windows

note about x86 releases, x86 nvml.dll is not installed on Windows x64!
cbuchner1 · Nov 13, 2014 · babaebc · babaebc
1 parent 8d6b809
commit babaebc
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 87 deletions.
diff --git a/api.cpp b/api.cpp
@@ -114,8 +114,6 @@ extern uint32_t rejected_count;
 static void gpustatus(int thr_id)
 {
 	char buf[MYBUFSIZ];
-	float gt;
-	int gp, gf;
 
 	if (thr_id >= 0 && thr_id < gpu_threads) {
 		struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
@@ -125,16 +123,12 @@ static void gpustatus(int thr_id)
 #ifdef USE_WRAPNVML
 		// todo
 		if (1 || cgpu->has_monitoring) {
-			gf = gpu_fanpercent(cgpu);
-			gt = gpu_temp(cgpu);
-			gp = gpu_power(cgpu);
-			// gpu_clock(cgpu);
+			cgpu->gpu_temp = gpu_temp(cgpu);
+			cgpu->gpu_fan = gpu_fanpercent(cgpu);
+			cgpu->gpu_power = gpu_power(cgpu);
+			//cgpu->gpu_clock = gpu_clock(cgpu);
 		}
-		else
 #endif
-		{
-			gt = 0.0;  gf = gp = 0;
-		}
 
 		// todo: can be 0 if set by algo (auto)
 		if (opt_intensity == 0 && opt_work_size) {
@@ -155,7 +149,7 @@ static void gpustatus(int thr_id)
 
 		sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;POWER=%d;KHS=%.2f;"
 			"HWF=%d;I=%d|",
-			thr_id, gt, gf, gp, cgpu->khashes,
+			thr_id, cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_power, cgpu->khashes,
 			cgpu->hw_errors, cgpu->intensity);
 
 		strcat(buffer, buf);

diff --git a/cpu-miner.c b/cpu-miner.c
@@ -56,21 +56,15 @@ BOOL WINAPI ConsoleHandler(DWORD);
 #define HEAVYCOIN_BLKHDR_SZ		84
 #define MNR_BLKHDR_SZ 80
 
-// from cuda.cu
-#ifdef __cplusplus
-extern "C"
-{
-#endif
+// from cuda.cpp
 int cuda_num_devices();
 void cuda_devicenames();
 void cuda_devicereset();
 int cuda_finddevice(char *name);
-#ifdef __cplusplus
-}
-#endif
 
 #ifdef USE_WRAPNVML
 #include "nvml.h"
+wrap_nvml_handle *hnvml = NULL;
 #endif
 
 #ifdef __linux /* Linux specific policy and affinity management */
@@ -247,10 +241,6 @@ uint32_t opt_work_size = 0; /* default */
 char *opt_api_allow = "127.0.0.1"; /* 0.0.0.0 for all ips */
 int opt_api_listen = 4068; /* 0 to disable */
 
-#ifdef USE_WRAPNVML
-wrap_nvml_handle *nvmlh = NULL;
-#endif
-
 #ifdef HAVE_GETOPT_LONG
 #include <getopt.h>
 #else
@@ -429,8 +419,8 @@ void proper_exit(int reason)
 	timeEndPeriod(1); // else never executed
 #endif
 #ifdef USE_WRAPNVML
-	if (nvmlh)
-		wrap_nvml_destroy(nvmlh);
+	if (hnvml)
+		wrap_nvml_destroy(hnvml);
 #endif
 	exit(reason);
 }
@@ -2140,8 +2130,8 @@ int main(int argc, char *argv[])
 	}
 
 #ifdef USE_WRAPNVML
-	nvmlh = wrap_nvml_create();
-	if (nvmlh) {
+	hnvml = wrap_nvml_create();
+	if (hnvml) {
 		// todo: link threads info gpu
 		applog(LOG_INFO, "NVML GPU monitoring enabled.");
 	} else {

diff --git a/cuda.cpp b/cuda.cpp
@@ -27,7 +27,7 @@ extern int device_map[8];
 extern int device_sm[8];
 
 // CUDA Devices on the System
-extern "C" int cuda_num_devices()
+int cuda_num_devices()
 {
 	int version;
 	cudaError_t err = cudaDriverGetVersion(&version);
@@ -54,7 +54,7 @@ extern "C" int cuda_num_devices()
 	return GPU_N;
 }
 
-extern "C" void cuda_devicenames()
+void cuda_devicenames()
 {
 	cudaError_t err;
 	int GPU_N;
@@ -76,7 +76,7 @@ extern "C" void cuda_devicenames()
 }
 
 // Can't be called directly in cpu-miner.c
-extern "C" void cuda_devicereset()
+void cuda_devicereset()
 {
 	cudaDeviceReset();
 }
@@ -103,7 +103,7 @@ static bool substringsearch(const char *haystack, const char *needle, int &match
 }
 
 // CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
-extern "C" int cuda_finddevice(char *name)
+int cuda_finddevice(char *name)
 {
 	int num = cuda_num_devices();
 	int match = 0;

diff --git a/nvml.cpp b/nvml.cpp
@@ -19,7 +19,6 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <errno.h>
 #ifndef _MSC_VER
 #include <libgen.h>
 #endif
@@ -34,7 +33,12 @@
 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
 	#include <windows.h>
 	static void *wrap_dlopen(const char *filename) {
-		return (void *)LoadLibrary(filename);
+		HMODULE h = LoadLibrary(filename);
+		if (!h && opt_debug) {
+			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", 
+				GetLastError(), filename);
+		}
+		return (void*)h;
 	}
 	static void *wrap_dlsym(void *h, const char *sym) {
 		return (void *)GetProcAddress((HINSTANCE)h, sym);
@@ -46,9 +50,16 @@
 #else
 	/* assume we can use dlopen itself... */
 	#include <dlfcn.h>
+	#include <errno.h>
 	static void *wrap_dlopen(const char *filename) {
-		return dlopen(filename, RTLD_NOW);
+		uintptr_t h = dlopen(filename, RTLD_NOW);
+		if (!h && opt_debug) {
+			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", 
+				errno, filename);
+		}
+		return (void*)h;
 	}
+
 	static void *wrap_dlsym(void *h, const char *sym) {
 		return dlsym(h, sym);
 	}
@@ -66,51 +77,27 @@ wrap_nvml_handle * wrap_nvml_create()
 	int i=0;
 	wrap_nvml_handle *nvmlh = NULL;
 
-	/*
-	 * We use hard-coded library installation locations for the time being...
-	 * No idea where or if libnvidia-ml.so is installed on MacOS X, a
-	 * deep scouring of the filesystem on one of the Mac CUDA build boxes
-	 * I used turned up nothing, so for now it's not going to work on OSX.
-	 */
-#if defined(_WIN64)
-	/* 64-bit Windows */
-#define  libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
-#elif defined(_WIN32) || defined(_MSC_VER)
-	/* 32-bit Windows */
-#define  libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
-#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
-	/* 32-bit linux assumed */
-#define  libnvidia_ml "/usr/lib32/libnvidia-ml.so"
-#elif defined(__linux)
-	/* 64-bit linux assumed */
-#define  libnvidia_ml "/usr/lib/libnvidia-ml.so"
+#if defined(WIN32)
+	/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */
+#define  libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll"
 #else
-#error "Unrecognized platform: need NVML DLL path for this platform..."
+	/* linux assumed */
+#define  libnvidia_ml "libnvidia-ml.so"
 #endif
 
-#if WIN32
 	char tmp[512];
-	ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp));
+#if WIN32
+	ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp));
 #else
-	char tmp[512] = libnvidia_ml;
+	strcpy(tmp, libnvidia_ml);
 #endif
 
 	void *nvml_dll = wrap_dlopen(tmp);
 	if (nvml_dll == NULL) {
 #ifdef WIN32
-		char lib[] = "nvml.dll";
-#else
-		char lib[64] = { '\0' };
-		snprintf(lib, sizeof(lib), "%s", basename(tmp));
-		/* try dlopen without path, here /usr/lib/nvidia-340/libnvidia-ml.so */
+		nvml_dll = wrap_dlopen("nvml.dll");
+		if (nvml_dll == NULL)
 #endif
-		nvml_dll = wrap_dlopen(lib);
-		if (opt_debug)
-			applog(LOG_DEBUG, "dlopen: %s=%p", lib, nvml_dll);
-	}
-	if (nvml_dll == NULL) {
-		if (opt_debug)
-			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, tmp);
 		return NULL;
 	}
 
@@ -120,9 +107,10 @@ wrap_nvml_handle * wrap_nvml_create()
 
 	nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
-	if (!nvmlh->nvmlInit)
+	if (!nvmlh->nvmlInit) {
 		nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
 			wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
+	}
 	nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
 	nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
@@ -153,11 +141,10 @@ wrap_nvml_handle * wrap_nvml_create()
 			nvmlh->nvmlDeviceGetPciInfo == NULL ||
 			nvmlh->nvmlDeviceGetName == NULL ||
 			nvmlh->nvmlDeviceGetTemperature == NULL ||
-			nvmlh->nvmlDeviceGetFanSpeed == NULL ||
-			nvmlh->nvmlDeviceGetPowerUsage == NULL)
+			nvmlh->nvmlDeviceGetFanSpeed == NULL)
 	{
 		if (opt_debug)
-			applog(LOG_DEBUG, "Failed to obtain all required NVML function pointers");
+			applog(LOG_DEBUG, "Failed to obtain required NVML function pointers");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
 		return NULL;
@@ -342,24 +329,26 @@ int wrap_nvml_destroy(wrap_nvml_handle *nvmlh)
 
 /* api functions */
 
-extern wrap_nvml_handle *nvmlh;
-extern int device_map[8];
+extern "C" {
+	wrap_nvml_handle *hnvml;
+	int device_map[8];
+}
 
 unsigned int gpu_fanpercent(struct cgpu_info *gpu)
 {
 	unsigned int pct = 0;
-	if (nvmlh) {
-		wrap_nvml_get_fanpcnt(nvmlh, device_map[gpu->thr_id], &pct);
+	if (hnvml) {
+		wrap_nvml_get_fanpcnt(hnvml, device_map[gpu->thr_id], &pct);
 	}
 	return pct;
 }
 
 double gpu_temp(struct cgpu_info *gpu)
 {
 	double tc = 0.0;
-	if (nvmlh) {
+	if (hnvml) {
 		unsigned int tmp = 0;
-		wrap_nvml_get_tempC(nvmlh, device_map[gpu->thr_id], &tmp);
+		wrap_nvml_get_tempC(hnvml, device_map[gpu->thr_id], &tmp);
 		tc = (double) tmp;
 	}
 	return tc;
@@ -368,27 +357,26 @@ double gpu_temp(struct cgpu_info *gpu)
 unsigned int gpu_clock(struct cgpu_info *gpu)
 {
 	unsigned int freq = 0;
-	if (nvmlh) {
-		wrap_nvml_get_clock(nvmlh, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq);
+	if (hnvml) {
+		wrap_nvml_get_clock(hnvml, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq);
 	}
 	return freq;
 }
 
 unsigned int gpu_power(struct cgpu_info *gpu)
 {
 	unsigned int mw = 0;
-	if (nvmlh) {
-		wrap_nvml_get_power_usage(nvmlh, device_map[gpu->thr_id], &mw);
+	if (hnvml) {
+		wrap_nvml_get_power_usage(hnvml, device_map[gpu->thr_id], &mw);
 	}
 	return mw;
 }
 
 int gpu_pstate(struct cgpu_info *gpu)
 {
 	int pstate = 0;
-	if (nvmlh) {
-		wrap_nvml_get_pstate(nvmlh, device_map[gpu->thr_id], &pstate);
-		//gpu->gpu_pstate = pstate;
+	if (hnvml) {
+		wrap_nvml_get_pstate(hnvml, device_map[gpu->thr_id], &pstate);
 	}
 	return pstate;
 }
@@ -433,7 +421,7 @@ int gpu_pstate(struct cgpu_info *gpu)
 *	nvmlDeviceGetFanSpeed
 	nvmlDeviceGetGpuOperationMode
 	nvmlDeviceGetHandleByIndex
-	nvmlDeviceGetHandleByIndex_v2
+*	nvmlDeviceGetHandleByIndex_v2
 	nvmlDeviceGetHandleByPciBusId
 	nvmlDeviceGetHandleByPciBusId_v2
 	nvmlDeviceGetHandleBySerial
@@ -450,7 +438,7 @@ int gpu_pstate(struct cgpu_info *gpu)
 	nvmlDeviceGetMinorNumber
 	nvmlDeviceGetMultiGpuBoard
 	nvmlDeviceGetName
-	nvmlDeviceGetPciInfo
+*	nvmlDeviceGetPciInfo
 	nvmlDeviceGetPciInfo_v2
 *	nvmlDeviceGetPerformanceState
 	nvmlDeviceGetPersistenceMode