Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Removal of old, decrepit bitalign patching code; fix for master not compiling after WhirlpoolX merge" #440

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ SUBDIRS = lib submodules ccan sph

bin_PROGRAMS = sgminer

sgminer_CPPFLAGS = $(PTHREAD_FLAGS) -std=gnu99 $(JANSSON_CPPFLAGS)
sgminer_CPPFLAGS = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_CPPFLAGS)
sgminer_LDFLAGS = $(PTHREAD_FLAGS)
sgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
@OPENCL_LIBS@ @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \
Expand Down Expand Up @@ -44,6 +44,7 @@ sgminer_SOURCES += pool.c pool.h
sgminer_SOURCES += algorithm.c algorithm.h
sgminer_SOURCES += config_parser.c config_parser.h
sgminer_SOURCES += events.c events.h
sgminer_SOURCES += ocl/patch_kernel.c ocl/patch_kernel.h
sgminer_SOURCES += ocl/build_kernel.c ocl/build_kernel.h
sgminer_SOURCES += ocl/binary_kernel.c ocl/binary_kernel.h

Expand Down
38 changes: 34 additions & 4 deletions ocl.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,21 @@ static float get_opencl_version(cl_device_id device)
return version;
}

static bool get_opencl_bit_align_support(cl_device_id *device)
{
char extensions[1024];
const char * camo = "cl_amd_media_ops";
char *find;
cl_int status;

status = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
if (status != CL_SUCCESS) {
return false;
}
find = strstr(extensions, camo);
return !!find;
}

static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
{
cl_int status;
Expand Down Expand Up @@ -247,6 +262,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL;
}

clState->hasBitAlign = get_opencl_bit_align_support(&devices[gpu]);

status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
Expand Down Expand Up @@ -527,7 +544,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg

build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL;
build_data->work_size = clState->wsize;
build_data->has_bit_align = clState->hasBitAlign;
build_data->opencl_version = get_opencl_version(devices[gpu]);
build_data->patch_bfi = needs_bfi_patch(build_data);

strcpy(build_data->binary_filename, filename);
build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix.
Expand All @@ -553,13 +572,23 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL;
}

// If it doesn't work, oh well, build it again next run
save_opencl_kernel(build_data, clState->program);
if (save_opencl_kernel(build_data, clState->program)) {
/* Program needs to be rebuilt, because the binary was patched */
if (build_data->patch_bfi) {
clReleaseProgram(clState->program);
clState->program = load_opencl_binary_kernel(build_data);
}
}
else {
if (build_data->patch_bfi)
quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch");
}
}

// Load kernels
applog(LOG_NOTICE, "Initialising kernel %s with nfactor %d, n %d",
filename, algorithm->nfactor, algorithm->n);
applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d",
filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un",
algorithm->nfactor, algorithm->n);

/* get a kernel object handle for a kernel with the given name */
clState->kernel = clCreateKernel(clState->program, "search", &status);
Expand All @@ -568,6 +597,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL;
}


clState->n_extra_kernels = algorithm->n_extra_kernels;
if (clState->n_extra_kernels > 0) {
unsigned int i;
Expand Down
1 change: 1 addition & 0 deletions ocl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct __clState {
cl_mem MidstateBuf;
cl_mem padbuffer8;
unsigned char cldata[80];
bool hasBitAlign;
bool goffset;
cl_uint vwidth;
size_t max_work_size;
Expand Down
46 changes: 44 additions & 2 deletions ocl/build_kernel.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <stdio.h>
#include "build_kernel.h"
#include "patch_kernel.h"
#include "miner.h"

static char *file_contents(const char *filename, int *length)
Expand Down Expand Up @@ -51,7 +52,6 @@ static char *file_contents(const char *filename, int *length)
return (char*)buffer;
}

// This should NOT be in here! -- Wolf9466
void set_base_compiler_options(build_kernel_data *data)
{
char buf[255];
Expand All @@ -61,17 +61,51 @@ void set_base_compiler_options(build_kernel_data *data)

sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long));
strcat(data->binary_filename, buf);

if (data->has_bit_align) {
strcat(data->compiler_options, " -D BITALIGN");
applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
} else
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");

if (data->kernel_path) {
strcat(data->compiler_options, " -I \"");
strcat(data->compiler_options, data->kernel_path);
strcat(data->compiler_options, "\"");
}

if (data->patch_bfi) {
strcat(data->compiler_options, " -D BFI_INT");
applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT");
} else
applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");

if (data->opencl_version < 1.1)
strcat(data->compiler_options, " -D OCL1");
}

bool needs_bfi_patch(build_kernel_data *data)
{
if (data->has_bit_align &&
(data->opencl_version < 1.2) &&
(strstr(data->platform, "Cedar") ||
strstr(data->platform, "Redwood") ||
strstr(data->platform, "Juniper") ||
strstr(data->platform, "Cypress" ) ||
strstr(data->platform, "Hemlock" ) ||
strstr(data->platform, "Caicos" ) ||
strstr(data->platform, "Turks" ) ||
strstr(data->platform, "Barts" ) ||
strstr(data->platform, "Cayman" ) ||
strstr(data->platform, "Antilles" ) ||
strstr(data->platform, "Wrestler" ) ||
strstr(data->platform, "Zacate" ) ||
strstr(data->platform, "WinterPark" )))
return true;
else
return false;
}

cl_program build_opencl_kernel(build_kernel_data *data, const char *filename)
{
int pl;
Expand Down Expand Up @@ -164,10 +198,18 @@ bool save_opencl_kernel(build_kernel_data *data, cl_program program)
goto out;
}

/* Patch the kernel if the hardware supports BFI_INT but it needs to
* be hacked in */
if (data->patch_bfi) {
if (kernel_bfi_patch(binaries[slot], binary_sizes[slot]) != 0) {
quit(1, "Could not patch BFI_INT, please report this issue.");
}
}

/* Save the binary to be loaded next time */
binaryfile = fopen(data->binary_filename, "wb");
if (!binaryfile) {
/* Not fatal, just means we build it again next time */
/* Not fatal, just means we build it again next time, unless BFI patch is needed */
applog(LOG_DEBUG, "Unable to create file %s", data->binary_filename);
goto out;
} else {
Expand Down
3 changes: 3 additions & 0 deletions ocl/build_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ typedef struct _build_kernel_data {
char sgminer_path[255];
const char *kernel_path;
size_t work_size;
bool has_bit_align;
bool patch_bfi;
float opencl_version;
} build_kernel_data;

bool needs_bfi_patch(build_kernel_data *data);
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename);
bool save_opencl_kernel(build_kernel_data *data, cl_program program);
void set_base_compiler_options(build_kernel_data *data);
Expand Down
97 changes: 97 additions & 0 deletions ocl/patch_kernel.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "patch_kernel.h"
#include "logging.h"
#include <string.h>
#include <stdint.h>

static int advance(char **area, unsigned *remaining, const char *marker)
{
char *find = (char *)memmem(*area, *remaining, (void *)marker, strlen(marker));

if (!find) {
applog(LOG_DEBUG, "Marker \"%s\" not found", marker);
return 0;
}
*remaining -= find - *area;
*area = find;
return 1;
}

#define OP3_INST_BFE_UINT 4ULL
#define OP3_INST_BFE_INT 5ULL
#define OP3_INST_BFI_INT 6ULL
#define OP3_INST_BIT_ALIGN_INT 12ULL
#define OP3_INST_BYTE_ALIGN_INT 13ULL

static void patch_opcodes(char *w, unsigned remaining)
{
uint64_t *opcode = (uint64_t *)w;
int patched = 0;
int count_bfe_int = 0;
int count_bfe_uint = 0;
int count_byte_align = 0;
while (42) {
int clamp = (*opcode >> (32 + 31)) & 0x1;
int dest_rel = (*opcode >> (32 + 28)) & 0x1;
int alu_inst = (*opcode >> (32 + 13)) & 0x1f;
int s2_neg = (*opcode >> (32 + 12)) & 0x1;
int s2_rel = (*opcode >> (32 + 9)) & 0x1;
int pred_sel = (*opcode >> 29) & 0x3;
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
if (alu_inst == OP3_INST_BFE_INT) {
count_bfe_int++;
} else if (alu_inst == OP3_INST_BFE_UINT) {
count_bfe_uint++;
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
count_byte_align++;
// patch this instruction to BFI_INT
*opcode &= 0xfffc1fffffffffffULL;
*opcode |= OP3_INST_BFI_INT << (32 + 13);
patched++;
}
}
if (remaining <= 8)
break;
opcode++;
remaining -= 8;
}
applog(LOG_DEBUG, "Potential OP3 instructions identified: "
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN",
count_bfe_int, count_bfe_uint, count_byte_align);
applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
}

bool kernel_bfi_patch(char *binary, unsigned binary_size)
{
unsigned remaining = binary_size;
char *w = binary;
unsigned int start, length;

/* Find 2nd incidence of .text, and copy the program's
* position and length at a fixed offset from that. Then go
* back and find the 2nd incidence of \x7ELF (rewind by one
* from ELF) and then patch the opcocdes */
if (!advance(&w, &remaining, ".text"))
return false;
w++; remaining--;
if (!advance(&w, &remaining, ".text")) {
/* 32 bit builds only one ELF */
w--; remaining++;
}
memcpy(&start, w + 285, 4);
memcpy(&length, w + 289, 4);
w = binary; remaining = binary_size;
if (!advance(&w, &remaining, "ELF"))
return false;
w++; remaining--;
if (!advance(&w, &remaining, "ELF")) {
/* 32 bit builds only one ELF */
w--; remaining++;
}
w--; remaining++;
w += start; remaining -= start;
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
w, remaining);
patch_opcodes(w, length);

return true;
}
10 changes: 10 additions & 0 deletions ocl/patch_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef PATCH_KERNEL_H
#define PATCH_KERNEL_H

#include <stdbool.h>

bool kernel_bfi_patch(char *binary, unsigned binary_size);

#endif /* PATCH_KERNEL_H */