-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #147 from junjihashimoto/feature/cuda
Add inline-c-cuda package for CUDA
- Loading branch information
Showing
9 changed files
with
282 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Copyright (c) 2015 FP Complete Corporation. | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
"Software"), to deal in the Software without restriction, including | ||
without limitation the rights to use, copy, modify, merge, publish, | ||
distribute, sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, subject to | ||
the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be | ||
included in all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | ||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | ||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Small set of utilities to inline CUDA code. See tests for example. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
cabal-version: 2.2 | ||
name: inline-c-cuda | ||
version: 0.1.0.0 | ||
synopsis: Lets you embed CUDA code into Haskell. | ||
description: Utilities to inline CUDA code into Haskell using inline-c. See | ||
tests for example on how to build. | ||
license: MIT | ||
license-file: LICENSE | ||
author: Francesco Mazzoli | ||
maintainer: [email protected] | ||
copyright: (c) 2015-2016 FP Complete Corporation, (c) 2017-2019 Francesco Mazzoli | ||
category: FFI | ||
tested-with: GHC == 9.2.2 | ||
build-type: Simple | ||
|
||
source-repository head | ||
type: git | ||
location: https://github.com/fpco/inline-c | ||
|
||
library | ||
exposed-modules: Language.C.Inline.Cuda | ||
build-depends: base >=4.7 && <5 | ||
, bytestring | ||
, inline-c >= 0.9.0.0 | ||
, inline-c-cpp | ||
, template-haskell | ||
, text | ||
, safe-exceptions | ||
, containers | ||
, process | ||
hs-source-dirs: src | ||
default-language: Haskell2010 | ||
ghc-options: -Wall | ||
extra-libraries: cudart | ||
|
||
test-suite tests | ||
type: exitcode-stdio-1.0 | ||
hs-source-dirs: test | ||
main-is: tests.hs | ||
build-depends: base >=4 && <5 | ||
, bytestring | ||
, inline-c | ||
, inline-c-cpp | ||
, inline-c-cuda | ||
, safe-exceptions | ||
, hspec | ||
, containers | ||
, template-haskell | ||
, vector | ||
default-language: Haskell2010 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
{-# LANGUAGE QuasiQuotes #-} | ||
{-# LANGUAGE TemplateHaskell #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
|
||
-- | Module exposing a 'Context' to inline CUDA code. We only have used | ||
-- this for experiments, so use with caution. See the CUDA tests to see | ||
-- how to build inline CUDA code. | ||
module Language.C.Inline.Cuda | ||
( module Language.C.Inline | ||
, cudaCtx | ||
, Cpp.cppTypePairs | ||
, Cpp.using | ||
, Cpp.AbstractCppExceptionPtr | ||
) where | ||
|
||
import qualified Language.Haskell.TH as TH | ||
import qualified Language.Haskell.TH.Syntax as TH | ||
|
||
import Language.C.Inline | ||
import Language.C.Inline.Context | ||
import qualified Language.C.Types as CT | ||
import qualified Language.C.Inline.Cpp as Cpp | ||
|
||
import qualified Data.Map as Map | ||
import Control.Monad.IO.Class (liftIO) | ||
import System.Exit (ExitCode(..)) | ||
import System.Process (readProcessWithExitCode) | ||
|
||
compileCuda :: String -> TH.Q FilePath | ||
compileCuda src = do | ||
cuFile <- TH.addTempFile "cu" | ||
oFile <- TH.addTempFile "o" | ||
let (cmd,args) = ("nvcc", ["-c","-o",oFile, cuFile]) | ||
(code, stdout, stderr) <- liftIO $ do | ||
writeFile cuFile src | ||
readProcessWithExitCode cmd args "" | ||
case code of | ||
ExitFailure _ -> fail $ "Compile Command: " ++ (foldl (\a b -> a ++ " " ++ b) " " (cmd : args)) ++ "\n" ++ " Output: " ++ stdout ++ "\n" ++ " Error: " ++ stderr | ||
ExitSuccess -> return oFile | ||
|
||
-- | The equivalent of 'C.baseCtx' for CUDA. It specifies the @.cu@ | ||
-- file extension for the CUDA file, so that nvcc will decide to build CUDA | ||
-- instead of C. See the @.cabal@ test target for an example on how to | ||
-- build. | ||
cudaCtx :: Context | ||
cudaCtx = Cpp.cppCtx <> mempty | ||
{ ctxForeignSrcLang = Just TH.RawObject | ||
, ctxOutput = Just $ \s -> "extern \"C\" {\n" ++ s ++ "\n}" | ||
, ctxEnableCpp = True | ||
, ctxRawObjectCompile = Just compileCuda | ||
, ctxTypesTable = Map.singleton (CT.TypeName "std::exception_ptr") [t|Cpp.AbstractCppExceptionPtr|] | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
{-# LANGUAGE CPP #-} | ||
{-# LANGUAGE ConstraintKinds #-} | ||
{-# LANGUAGE DataKinds #-} | ||
{-# LANGUAGE FlexibleContexts #-} | ||
{-# LANGUAGE FlexibleInstances #-} | ||
{-# LANGUAGE GADTs #-} | ||
{-# LANGUAGE KindSignatures #-} | ||
{-# LANGUAGE MultiParamTypeClasses #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE PolyKinds #-} | ||
{-# LANGUAGE QuasiQuotes #-} | ||
{-# LANGUAGE RankNTypes #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
{-# LANGUAGE TemplateHaskell #-} | ||
{-# LANGUAGE TypeFamilies #-} | ||
{-# LANGUAGE TypeInType #-} | ||
{-# LANGUAGE TypeOperators #-} | ||
{-# LANGUAGE UndecidableInstances #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
{-# OPTIONS_GHC -Wno-deprecations #-} | ||
|
||
import Control.Exception.Safe | ||
import Control.Monad | ||
import qualified Language.C.Inline.Context as CC | ||
import qualified Language.C.Types as CT | ||
import qualified Language.C.Inline.Cuda as C | ||
import qualified Test.Hspec as Hspec | ||
import Test.Hspec (shouldBe) | ||
import Foreign.Ptr (Ptr) | ||
import Data.Monoid | ||
import Foreign.Marshal.Array | ||
import Foreign.Marshal.Alloc | ||
import Foreign.Storable | ||
|
||
|
||
C.context $ C.cudaCtx | ||
|
||
C.include "<iostream>" | ||
C.include "<stdexcept>" | ||
|
||
[C.emitBlock| | ||
__global__ void | ||
vectorAdd(const float *A, const float *B, float *C, int numElements) | ||
{ | ||
int i = blockDim.x * blockIdx.x + threadIdx.x; | ||
|
||
if (i < numElements) | ||
{ | ||
C[i] = A[i] + B[i]; | ||
} | ||
} | ||
|] | ||
|
||
cudaAllocaArray :: forall b. Int -> (Ptr C.CFloat -> IO b) -> IO b | ||
cudaAllocaArray size func = do | ||
let csize = fromIntegral size | ||
alloca $ \(ptr_d_A :: Ptr (Ptr C.CFloat)) -> do | ||
[C.block| void { | ||
cudaError_t err = cudaMalloc((void **)$(float** ptr_d_A), $(int csize) * sizeof(float)); | ||
if (err != cudaSuccess) | ||
{ | ||
fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n", cudaGetErrorString(err)); | ||
exit(EXIT_FAILURE); | ||
} | ||
} |] | ||
d_A <- peekElemOff ptr_d_A 0 | ||
ret <- func d_A | ||
[C.block| void { | ||
cudaError_t err = cudaFree($(float* d_A)); | ||
if (err != cudaSuccess) | ||
{ | ||
fprintf(stderr, "Failed to free device vector A (error code %s)!\n", cudaGetErrorString(err)); | ||
exit(EXIT_FAILURE); | ||
} | ||
} |] | ||
return ret | ||
|
||
cudaMemcpyHostToDevice :: Int -> Ptr C.CFloat -> Ptr C.CFloat -> IO () | ||
cudaMemcpyHostToDevice num host device = do | ||
let cnum = fromIntegral num | ||
[C.block| void { | ||
cudaError_t err = cudaMemcpy($(float* device), $(float* host), $(int cnum) * sizeof(float), cudaMemcpyHostToDevice); | ||
if (err != cudaSuccess) | ||
{ | ||
fprintf(stderr, "Failed to copy vector from host to device (error code %s)!\n", cudaGetErrorString(err)); | ||
exit(EXIT_FAILURE); | ||
} | ||
} |] | ||
|
||
cudaMemcpyDeviceToHost :: Int -> Ptr C.CFloat -> Ptr C.CFloat -> IO () | ||
cudaMemcpyDeviceToHost num device host = do | ||
let cnum = fromIntegral num | ||
[C.block| void { | ||
cudaError_t err = cudaMemcpy($(float* host), $(float* device), $(int cnum) * sizeof(float), cudaMemcpyDeviceToHost); | ||
if (err != cudaSuccess) | ||
{ | ||
fprintf(stderr, "Failed to copy vector C from device to host (error code %s)!\n", cudaGetErrorString(err)); | ||
exit(EXIT_FAILURE); | ||
} | ||
} |] | ||
|
||
|
||
main :: IO () | ||
main = Hspec.hspec $ do | ||
Hspec.describe "Basic CUDA" $ do | ||
Hspec.it "Add vectors on device" $ do | ||
let numElements = 50000 | ||
cNumElements = fromIntegral numElements | ||
allocaArray numElements $ \(h_A :: Ptr C.CFloat) -> do | ||
allocaArray numElements $ \(h_B :: Ptr C.CFloat) -> do | ||
allocaArray numElements $ \(h_C :: Ptr C.CFloat) -> do | ||
cudaAllocaArray numElements $ \(d_A :: Ptr C.CFloat) -> do | ||
cudaAllocaArray numElements $ \(d_B :: Ptr C.CFloat) -> do | ||
cudaAllocaArray numElements $ \(d_C :: Ptr C.CFloat) -> do | ||
[C.block| void { | ||
for (int i = 0; i < $(int cNumElements); ++i) | ||
{ | ||
$(float* h_A)[i] = rand()/(float)RAND_MAX; | ||
$(float* h_B)[i] = rand()/(float)RAND_MAX; | ||
} | ||
} |] | ||
cudaMemcpyHostToDevice numElements h_A d_A | ||
cudaMemcpyHostToDevice numElements h_B d_B | ||
[C.block| void { | ||
int threadsPerBlock = 256; | ||
int blocksPerGrid =($(int cNumElements) + threadsPerBlock - 1) / threadsPerBlock; | ||
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>($(float* d_A), $(float* d_B), $(float* d_C), $(int cNumElements)); | ||
} |] | ||
cudaMemcpyDeviceToHost numElements d_C h_C | ||
lA <- peekArray numElements h_A | ||
lB <- peekArray numElements h_B | ||
lC <- peekArray numElements h_C | ||
all (< 1e-5) (map (\((a,b),c) -> abs(a + b - c)) (zip (zip lA lB) lC)) `shouldBe` True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ module Language.C.Inline | |
, block | ||
, include | ||
, verbatim | ||
, emitBlock | ||
|
||
-- * 'Ptr' utils | ||
, withPtr | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters