From 9e4c04abaa60a07b1a2c58f8f4e6687756504c5b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 7 Jul 2024 22:02:11 +0000 Subject: [PATCH] fix edge case when chunk len is 1 and multithread worker num --- encoding/kzg/prover/gpu/multiframe_proof.go | 24 ++++++++++----------- encoding/test/main.go | 11 +++++++--- encoding/utils/gpu_utils/utils.go | 8 +++++++ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/encoding/kzg/prover/gpu/multiframe_proof.go b/encoding/kzg/prover/gpu/multiframe_proof.go index a1e865332..1edaad67d 100644 --- a/encoding/kzg/prover/gpu/multiframe_proof.go +++ b/encoding/kzg/prover/gpu/multiframe_proof.go @@ -77,6 +77,8 @@ func (p *GpuComputeDevice) ComputeMultiFrameProof(polyFr []fr.Element, numChunks l := chunkLen numPoly := uint64(len(polyFr)) / dimE / chunkLen fmt.Println("numPoly", numPoly) + fmt.Println("dimE", dimE) + fmt.Println("l", l) begin := time.Now() @@ -113,6 +115,16 @@ func (p *GpuComputeDevice) ComputeMultiFrameProof(polyFr []fr.Element, numChunks } preprocessDone := time.Now() + /* + for i := 0; i < int(l*numPoly); i++ { + vec := coeffStore[i] + for j := 0; j < len(vec); j++ { + fmt.Printf("%v ", vec[j].String()) + } + fmt.Println() + } + */ + // Start using GPU p.GpuLock.Lock() defer p.GpuLock.Unlock() @@ -125,18 +137,6 @@ func (p *GpuComputeDevice) ComputeMultiFrameProof(polyFr []fr.Element, numChunks } nttDone := time.Now() - /* - fmt.Println("after fft") - vec := gpu_utils.ConvertScalarFieldsToFrBytes(coeffStoreFft) - for i := 0; i < int(l*numPoly); i++ { - length := int(dimE) * 2 - for j := 0; j < length; j++ { - fmt.Printf("%v ", vec[i*length+j].String()) - } - fmt.Println() - } - */ - // transpose the FFT tranformed matrix coeffStoreFftTranspose, err := Transpose(coeffStoreFft, int(l), int(numPoly), int(dimE)*2) if err != nil { diff --git a/encoding/test/main.go b/encoding/test/main.go index f428adbeb..7e0c058c2 100644 --- a/encoding/test/main.go +++ b/encoding/test/main.go @@ -52,7 +52,7 @@ func readpoints() { func TestKzgRs() { isSmallTest := false - numSymbols := 4096 * 8 + numSymbols := 4096 / 8 // encode parameters numNode := uint64(4096) // 200 numSys := uint64(512) // 180 @@ -72,7 +72,7 @@ func TestKzgRs() { } if isSmallTest { - numSymbols = 4 + numSymbols = 2 numNode = 4 numSys = 2 numPar = numNode - numSys @@ -96,7 +96,7 @@ func TestKzgRs() { // create encoding object p, _ := prover.NewProver(kzgConfig, true) - p.UseGpu = true + p.UseGpu = false params := encoding.EncodingParams{NumChunks: numNode, ChunkLength: uint64(numSymbols) / uint64(numSys)} enc, _ := p.GetKzgEncoder(params) @@ -139,6 +139,11 @@ func TestKzgRs() { log.Fatal("leading coset inconsistency") } + // special case when chunk Len =1, we need to artificially use root doubled + if params.ChunkLength == uint64(1) { + j *= 2 + } + lc := enc.Fs.ExpandedRootsOfUnity[uint64(j)] g2Atn, err := kzg.ReadG2Point(uint64(len(f.Coeffs)), kzgConfig) diff --git a/encoding/utils/gpu_utils/utils.go b/encoding/utils/gpu_utils/utils.go index 5283a5d04..14d7b334f 100644 --- a/encoding/utils/gpu_utils/utils.go +++ b/encoding/utils/gpu_utils/utils.go @@ -85,6 +85,10 @@ func IcicleProjectiveToGnarkAffine(p bn254_icicle.Projective) bn254.G1Affine { func HostSliceIcicleProjectiveToGnarkAffine(ps core.HostSlice[bn254_icicle.Projective], numWorker int) []bn254.G1Affine { output := make([]bn254.G1Affine, len(ps)) + if len(ps) < numWorker { + numWorker = len(ps) + } + var wg sync.WaitGroup interval := int(math.Ceil(float64(len(ps)) / float64(numWorker))) @@ -112,6 +116,10 @@ func HostSliceIcicleProjectiveToGnarkAffine(ps core.HostSlice[bn254_icicle.Proje func ConvertFrToScalarFieldsBytesThread(data []fr.Element, numWorker int) []bn254_icicle.ScalarField { scalars := make([]bn254_icicle.ScalarField, len(data)) + if len(data) < numWorker { + numWorker = len(data) + } + var wg sync.WaitGroup interval := int(math.Ceil(float64(len(data)) / float64(numWorker)))