{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":3084616,"defaultBranch":"master","name":"GPTL","ownerLogin":"jmrosinski","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2012-01-01T23:13:20.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/1297902?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1684205799.203277","currentOid":""},"activityList":{"items":[{"before":"6f37714b1a589bd045024b8586a506c040f07eac","after":"7782b8b448e61588da1845782f26613a038f2361","ref":"refs/heads/master","pushedAt":"2024-05-03T17:29:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"do-nothing test commit","shortMessageHtmlLink":"do-nothing test commit"}},{"before":"dbf170bb0b467ce1071b928e19695cc09a6690c0","after":"6d0f416952aacf3a33563b8de7e68dabfe0eb56e","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-11-30T20:24:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o -Mcuda=cc<$CCAB> changed to -cuda per warnings from latest compiler\no Added cuda/gptl_ompacc.h\n - But there are no tests for it as yet because nvc, nvfortran say compute\n capability at least cc70 is required for OMP on device to work. The gpu\n on my machine is only cc61","shortMessageHtmlLink":"o -Mcuda=cc<$CCAB> changed to -cuda per warnings from latest compiler"}},{"before":"0082e807ef58848e66242c19c48b65609328c6c8","after":"dbf170bb0b467ce1071b928e19695cc09a6690c0","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-11-21T00:52:35.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Forgot to git add verify_results.F90 prv commit","shortMessageHtmlLink":"o Forgot to git add verify_results.F90 prv commit"}},{"before":"f02b6630de80c227136fc84acb3ac4fd3b6789ea","after":"0082e807ef58848e66242c19c48b65609328c6c8","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-11-21T00:49:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Under cuda12.3: Fix for \"free\" failing when run on arrays cudaMalloc'd on host\n - Merged gptl_host.cu into gptl_device.cu & renamed gptl_host_device.cu\n - Enabled removing \"GPTL\" prefix from some routine names\no Only compile and use *mutex routines when PRINTNEG is defined\no Add verify_results to facctests","shortMessageHtmlLink":"o Under cuda12.3: Fix for \"free\" failing when run on arrays cudaMallo…"}},{"before":"5ceafcb2f89dbfef9dded11b5e90f1e5bc7f5481","after":"e8256bf3ebaf8e947fffc1e52533b0a721137d4b","ref":"refs/heads/cuda_restructure","pushedAt":"2023-05-19T18:32:09.662Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Change maxtimers to user-specified number of timers (one less than total\n due to \"phantom\" GPTL_ROOT\no Move constant memory settings to init_final, due to requirements of\n cudaMemcpyToSymbol requiring the symbol to be defined locally.\no Add facctests/verify_results.F90 testing code.\n TODO: add it to \"make check\"\no Some cleanup of gpustats.cu and output.cu. More still needed.","shortMessageHtmlLink":"o Change maxtimers to user-specified number of timers (one less than …"}},{"before":"be8239276564a77c7faacf58fad0968196dbb6a1","after":"5ceafcb2f89dbfef9dded11b5e90f1e5bc7f5481","ref":"refs/heads/cuda_restructure","pushedAt":"2023-05-17T02:54:46.948Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Compiles and works with NIM. Similar performance to branch it was branched\n from.\no TBD: __constant__ doesn't work, probably due to \"symbol\" needs to be\n visible. Move to api.cu?\no Maybe got \"inline\" issue addressed. But didn't see any improvement in\n performance.\no Consolidate and rename some of the namespaces? Namespace pollution is still\n possible as currently configured.","shortMessageHtmlLink":"o Compiles and works with NIM. Similar performance to branch it was b…"}},{"before":null,"after":"be8239276564a77c7faacf58fad0968196dbb6a1","ref":"refs/heads/cuda_restructure","pushedAt":"2023-05-16T02:56:39.203Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Interim commit of branch which splits files into those related to the API\n vs. other internal functions.","shortMessageHtmlLink":"o Interim commit of branch which splits files into those related to t…"}},{"before":"93f6fedf11c29e303501a70029c7d4132e21de88","after":"f02b6630de80c227136fc84acb3ac4fd3b6789ea","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-12T22:02:25.837Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Added cudaGetLastError() tests after kernel calls.","shortMessageHtmlLink":"o Added cudaGetLastError() tests after kernel calls."}},{"before":"a2314c7161c877de6c57c9efdb264a40791a48ea","after":"93f6fedf11c29e303501a70029c7d4132e21de88","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-12T19:40:10.402Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Test toomanytimers.c needed a #include \"../config.h\" to work properly\no Minor other changes","shortMessageHtmlLink":"o Test toomanytimers.c needed a #include \"../config.h\" to work properly"}},{"before":"0c3f5686924ad51ed1adc0f1e2cae0f0bf695208","after":"a2314c7161c877de6c57c9efdb264a40791a48ea","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-10T20:40:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Remove all source and builds for tests which are not part of \"make check\".\n (cuda/tests, cacctests, facctests). They were moved to testgit/gptl_gpu.\n Those builds are now much easier to understand since they use simple\n \"Makefile\"s, not Makefile.am","shortMessageHtmlLink":"o Remove all source and builds for tests which are not part of \"make …"}},{"before":"8c5ec10590391ae483d7e9b720e10711eec59269","after":"0c3f5686924ad51ed1adc0f1e2cae0f0bf695208","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-09T21:38:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Added configure-time --enable-constantmem (default true) to enable use of\n constant memory. Gives good speedup so this should be enabled if compute\n capability allows it.\no Added \"global_retval\" to various __global__ routines to allow percolation\n of return status from GPU up to CPU\n - Added cuda/tests/verify_init_final.cu to test percolation of GPU return\n codes up to CPU. So far only initialize/finalize have this.\no Minor changes:\n - Made update_stats_gpu void return.\n - Removed unneeded passage of warpsize to initialize_gpu","shortMessageHtmlLink":"o Added configure-time --enable-constantmem (default true) to enable …"}},{"before":"719f74d44ad5f2e2dbecebff278bb2ae50114757","after":"8c5ec10590391ae483d7e9b720e10711eec59269","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-03T17:18:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Add comment re: how helper_cuda.h can assist getting cores info.","shortMessageHtmlLink":"o Add comment re: how helper_cuda.h can assist getting cores info."}},{"before":null,"after":"6cdc3dd66e49475a318c8781b8950fe0d86c3dec","ref":"refs/heads/cuda_acc_autoconf_placebo","pushedAt":"2023-05-03T02:05:49.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Works with NIM","shortMessageHtmlLink":"o Works with NIM"}},{"before":"2fc6b28354e53030370e37614b569c5e55b3123a","after":"719f74d44ad5f2e2dbecebff278bb2ae50114757","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-02T17:21:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o runconfigure: Add setting \"prefix\" to list of settings\no toomanytimers.c: Forgot to git add this on prv. commit","shortMessageHtmlLink":"o runconfigure: Add setting \"prefix\" to list of settings"}},{"before":"aabd6817acddf765f6397725beecfa5a60d664fc","after":"2fc6b28354e53030370e37614b569c5e55b3123a","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-05-02T02:47:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o gptl_device.cu:\n - Speedups by making maxtimers, warpsize, maxwarps __constant__\n - Some speedup by simplifying get_warp_num()\n - Bugfix for off-by-one error: out-of-bounds memory would be written to if\n requesting a new timer beyond allocated bounds (maxtimers changed to\n maxtimers-1). Also in GPTLfill_gpustats missed a final timer.\n - Add global_retval to managed memory, allows __global__ routines to return\n status info and calling routine up to CPU to provide more info to the user.\n Useful mainly on <<<1,1>>> launches, but e.g. GPTLget_overhead_gpu() now handles\n multiple blocks and tests are passing.\n - Use util_device.cu routine GPTLerror* routines to avoid potentially huge\n printout on SM changes between start and stop.\n - Changed GPTLreset_gpu to zero only a SINGLE timer, and created GPTLreset_all_gpu\n to zero ALL existing timers.\n - Bugfix for printing zeros for some overhead calcs: Changed type of \"ohd\" variables from\n long long to float results in non-zero printout of cost of my_strlen and STRMATCH\n - Allow GPTLget_overhead_gpu() to be called by more than 1 thread. Now set up to be\n called on a single SM for all blocks on the SM. Overhead estimate improved dramatically, but\n looks like the \"scalefac\" from before for revising the estimate was working surprisingly well.\no cacctests/toomanytimers.c and Makefile.am,:\n Add \"toomanytimers\" to cacctests to verify correct library behavior when\n more than maxtimers-1 timers are requested.\no gptl_host.cu: use global_retval to give more useful info to the user.\no print_gpustats.cu:\n - use computed GPTLwarpsize rather than hard-wired #define WARPSIZE 32\n - For overhead calcs, compute and average results across an SM rather than on just a\n single thread. Does provide a much better overhead estimate thus \"scalefac\" is\n much closer to 1.\n o gptl.c:\n - Make use of added info provided by new variable \"global_retval\" to inform user\n - Compute \"warps_per_gpu\" for internal use by GPTL routines.","shortMessageHtmlLink":"o gptl_device.cu:"}},{"before":"c9440ba9bb37e0138eb667be1f01e5cceecadbd3","after":"aabd6817acddf765f6397725beecfa5a60d664fc","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-04-27T18:22:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Fix GPU overhead printing for my_strlen(), STRMATCH\no Add print for some config-time settings, e.g. GPU compute capability","shortMessageHtmlLink":"o Fix GPU overhead printing for my_strlen(), STRMATCH"}},{"before":"cbe82c7b08d128aba7aba6fff9e3cca10f88aca8","after":"c9440ba9bb37e0138eb667be1f01e5cceecadbd3","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-04-27T00:05:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Fix configure.ac and facctests/badinput.F90 to work properly when\n --enable-dummygpustartstop is specified","shortMessageHtmlLink":"o Fix configure.ac and facctests/badinput.F90 to work properly when"}},{"before":"8df066780cb05c300d2167fed7b1e46730746658","after":"cbe82c7b08d128aba7aba6fff9e3cca10f88aca8","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-04-26T23:03:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o gptl_device.cu: Add ifdef ENABLE_CONSTANTMEM hard-wired to gptl_device.cu\n Defaults enabled, might be useful if complaints about constant memory\no configure.ac: Add --enable-dummygpustartstop turns GPTLstart_gpu and\n GPTLstop_gpu into just return statements. Useful for assessing gpu GPTL\n overhead. Default disabled.\no runconfigure: Add dummygpustartstop question. Default disabled.","shortMessageHtmlLink":"o gptl_device.cu: Add ifdef ENABLE_CONSTANTMEM hard-wired to gptl_dev…"}},{"before":"b719188691dcd78fd4afde36c9d34fd9034babec","after":"8df066780cb05c300d2167fed7b1e46730746658","ref":"refs/heads/cuda_acc_autoconf","pushedAt":"2023-04-26T19:53:46.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"o Forgot to add the files from prv. commit","shortMessageHtmlLink":"o Forgot to add the files from prv. commit"}},{"before":"a7e79e67718b9a55ae69ae804b2d81ed54c87041","after":"6f37714b1a589bd045024b8586a506c040f07eac","ref":"refs/heads/master","pushedAt":"2023-03-09T19:52:50.670Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"jmrosinski","name":"Jim Rosinski","path":"/jmrosinski","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1297902?s=80&v=4"},"commit":{"message":"Merge pull request #64 from mayeths/master\n\no Bugfix for issue jmrosinski#63","shortMessageHtmlLink":"Merge pull request #64 from mayeths/master"}}],"hasNextPage":false,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wNS0wM1QxNzoyOTo1Mi4wMDAwMDBazwAAAARA-qHr","endCursor":"Y3Vyc29yOnYyOpK7MjAyMy0wMy0wOVQxOTo1Mjo1MC42NzA2MzdazwAAAAMAL9PB"}},"title":"Activity · jmrosinski/GPTL"}