Skip to content

Commit

Permalink
metal : round up to 16 to fix MTLDebugComputeCommandEncoder assertion (
Browse files Browse the repository at this point in the history
  • Loading branch information
psugihara authored Nov 3, 2023
1 parent 5ba3746 commit d9b33fe
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,7 @@ void ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
[encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
[encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];

[encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
} break;
Expand Down Expand Up @@ -1348,7 +1348,7 @@ void ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
[encoder setBytes:&eps length:sizeof( float) atIndex:4];
[encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
[encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];

const int64_t nrows = ggml_nrows(src0);

Expand Down

0 comments on commit d9b33fe

Please sign in to comment.