From 07668ef008e62a8286d7cb62ff1887f2dedaa505 Mon Sep 17 00:00:00 2001 From: Rory Tyler Hayford Date: Fri, 17 May 2024 16:28:13 +0700 Subject: [PATCH 1/6] Track execution info --- .../inferno-ml-server-types.cabal | 1 + .../src/Inferno/ML/Server/Client.hs | 12 ++++ .../src/Inferno/ML/Server/Types.hs | 44 +++++++++++++ inferno-ml-server/exe/Client.hs | 4 +- inferno-ml-server/inferno-ml-server.cabal | 3 + .../src/Inferno/ML/Server/Inference.hs | 66 +++++++++++++++++-- .../src/Inferno/ML/Server/Types.hs | 16 ++++- .../src/Inferno/ML/Server/Utils.hs | 16 ++++- .../migrations/v1-create-tables.sql | 14 ++++ 9 files changed, 168 insertions(+), 8 deletions(-) diff --git a/inferno-ml-server-types/inferno-ml-server-types.cabal b/inferno-ml-server-types/inferno-ml-server-types.cabal index b9a08557..12ab2411 100644 --- a/inferno-ml-server-types/inferno-ml-server-types.cabal +++ b/inferno-ml-server-types/inferno-ml-server-types.cabal @@ -64,4 +64,5 @@ library , unix , uri-bytestring , uri-bytestring-aeson + , uuid , vector diff --git a/inferno-ml-server-types/src/Inferno/ML/Server/Client.hs b/inferno-ml-server-types/src/Inferno/ML/Server/Client.hs index 532fcda1..9cefbbea 100644 --- a/inferno-ml-server-types/src/Inferno/ML/Server/Client.hs +++ b/inferno-ml-server-types/src/Inferno/ML/Server/Client.hs @@ -13,6 +13,7 @@ where import Data.Int (Int64) import Data.Proxy (Proxy (Proxy)) +import Data.UUID (UUID) import Inferno.ML.Server.Types import Servant ((:<|>) ((:<|>))) import Servant.Client.Streaming (ClientM, client) @@ -23,8 +24,19 @@ statusC :: ClientM (Maybe ()) -- | Run an inference parameter inferenceC :: + -- | SQL identifier of the inference parameter to be run Id (InferenceParam uid gid p s) -> + -- | Optional resolution for scripts that use e.g. @valueAt@; defaults to + -- 128 if not specified Maybe Int64 -> + -- | Job identifer. This is used to save execution statistics for each + -- inference evaluation + UUID -> + -- | Note that every item in the output stream (first element of each + -- outer tuple) should be declared as writable by the corresponding + -- inference parameter. It is the responsibility of the runtime system + -- (not defined in this repository) to verify this before directing + -- the writes to their final destination ClientM (WriteStream IO) -- | Cancel the existing inference job, if it exists diff --git a/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs b/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs index 5aa1cff3..4fbb27ec 100644 --- a/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs +++ b/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs @@ -39,6 +39,7 @@ import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Text.Encoding as Text.Encoding import Data.Time (UTCTime) +import Data.UUID (UUID) import Data.Vector (Vector) import qualified Data.Vector as Vector import Data.Word (Word32, Word64) @@ -108,6 +109,7 @@ type InfernoMlServerAPI uid gid p s t = :<|> "inference" :> Capture "id" (Id (InferenceParam uid gid p s)) :> QueryParam "res" Int64 + :> QueryParam' '[Required] "uuid" UUID :> StreamPost NewlineFraming JSON (WriteStream IO) :<|> "inference" :> "cancel" :> Put '[JSON] () -- Register the bridge. This is an `inferno-ml-server` endpoint, not a @@ -643,6 +645,48 @@ instance ip ^. the @"user" & toField ] +-- | Information about execution time and resource usage. This is saved by +-- @inferno-ml-server@ after script evaluation completes and can be queried +-- later by using the same job identifier that was provided to the @/inference@ +-- route +data ExecutionInfo uid gid p = ExecutionInfo + { -- | Note that this is the job identifier provided to the inference + -- evaluation route, and is also the primary key of the database table + id :: UUID, + param :: Id (InferenceParam uid gid p VCObjectHash), + -- | When inference evaluation started + start :: UTCTime, + -- | When inference evaluation ended + end :: UTCTime, + -- | The number of bytes allocated between the @start@ and @end@ + allocated :: Word64, + -- | Additional CPU time used between the @start@ and @end@. This is + -- converted from picoseconds to milliseconds + cpu :: Word64 + } + deriving stock (Show, Eq, Generic) + deriving anyclass (FromJSON, ToJSON) + +instance FromRow (ExecutionInfo uid gid p) where + fromRow = + ExecutionInfo + <$> field + <*> field + <*> field + <*> field + <*> fmap (fromIntegral @Int64) field + <*> fmap (fromIntegral @Int64) field + +instance ToRow (ExecutionInfo uid gid p) where + toRow ei = + [ ei ^. the @"id" & toField, + ei ^. the @"param" & toField, + ei ^. the @"start" & toField, + ei ^. the @"end" & toField, + ei ^. the @"allocated" & toField, + ei ^. the @"cpu" & toField + ] + -- | A user, parameterized by the user and group types data User uid gid = User { id :: uid, diff --git a/inferno-ml-server/exe/Client.hs b/inferno-ml-server/exe/Client.hs index 10b3c35e..3485220a 100644 --- a/inferno-ml-server/exe/Client.hs +++ b/inferno-ml-server/exe/Client.hs @@ -31,6 +31,7 @@ import Servant.Client.Streaming ) import System.Exit (die) import System.Posix.Types (EpochTime) +import System.Random (randomIO) import Text.Read (readMaybe) import UnliftIO (throwString) import UnliftIO.Environment (getArgs) @@ -41,6 +42,7 @@ main = getArgs >>= \case i : _ -> do ipid <- maybe (throwString "Invalid ID") (pure . Id) $ readMaybe i + uuid <- randomIO env <- mkClientEnv <$> newManager defaultManagerSettings @@ -51,7 +53,7 @@ main = . registerBridgeC . flip BridgeInfo 9999 $ toIPv4 (127, 0, 0, 1) - withClientM (inferenceC ipid Nothing) env . either throwIO $ + withClientM (inferenceC ipid Nothing uuid) env . either throwIO $ verifyWrites (coerce ipid) _ -> die "Usage: test-client " diff --git a/inferno-ml-server/inferno-ml-server.cabal b/inferno-ml-server/inferno-ml-server.cabal index 9ec63155..0777f4ea 100644 --- a/inferno-ml-server/inferno-ml-server.cabal +++ b/inferno-ml-server/inferno-ml-server.cabal @@ -85,6 +85,7 @@ library , text , time , unliftio + , uuid , vector , wai , wai-logger @@ -132,8 +133,10 @@ executable test-client , http-client , inferno-ml-server-types , iproute + , random , servant-client , unliftio + , uuid executable dummy-bridge import: common diff --git a/inferno-ml-server/src/Inferno/ML/Server/Inference.hs b/inferno-ml-server/src/Inferno/ML/Server/Inference.hs index c27ca6d0..42fcba3c 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Inference.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Inference.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE LexicalNegation #-} +{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} @@ -26,9 +27,12 @@ import Data.Map (Map) import qualified Data.Map as Map import Data.Maybe (fromMaybe) import qualified Data.Text as Text +import Data.Time (UTCTime, getCurrentTime) import Data.Time.Clock.POSIX (getPOSIXTime) import Data.Traversable (for) +import Data.UUID (UUID) import qualified Data.Vector as Vector +import Data.Word (Word64) import Database.PostgreSQL.Simple ( Only (Only), Query, @@ -59,8 +63,11 @@ import Inferno.VersionControl.Types ( VCObject (VCFunction), ) import Lens.Micro.Platform +import System.CPUTime (getCPUTime) import System.FilePath (dropExtensions, (<.>)) +import System.Mem (getAllocationCounter, setAllocationCounter) import System.Posix.Types (EpochTime) +import UnliftIO (withRunInIO) import UnliftIO.Async (wait, withAsync) import UnliftIO.Directory ( createFileLink, @@ -86,10 +93,9 @@ import UnliftIO.Timeout (timeout) runInferenceParam :: Id InferenceParam -> Maybe Int64 -> + UUID -> RemoteM (WriteStream IO) --- FIXME / TODO Deal with default resolution, probably shouldn't need to be --- passed on all requests -runInferenceParam ipid (fromMaybe 128 -> res) = +runInferenceParam ipid (fromMaybe 128 -> res) uuid = withTimeoutMillis $ \t -> do logTrace $ RunningInference ipid t maybe (throwM (ScriptTimeout t)) pure @@ -107,8 +113,13 @@ runInferenceParam ipid (fromMaybe 128 -> res) = $ wait a -- Actually runs the inference evaluation, within the configured timeout + -- + -- NOTE: Do not fork anything else inside here; this is already running + -- in an `Async` and we want to be able to get execution statistics from + -- the runtime. Specifically, we are using `getAllocationCounter`, but + -- this only captures the allocations _in this thread only_ runInference :: Int -> RemoteM (Maybe (WriteStream IO)) - runInference tmo = timeout tmo $ do + runInference tmo = timeout tmo . withExecutionInfo $ do view #interpreter >>= readIORef >>= \case Nothing -> throwM BridgeNotRegistered Just interpreter -> do @@ -235,7 +246,52 @@ runInferenceParam ipid (fromMaybe 128 -> res) = withTimeoutMillis :: (Int -> RemoteM b) -> RemoteM b withTimeoutMillis = (view (#config . #timeout) >>=) - . (. (* 1000000) . fromIntegral) + . (. (* 1_000_000) . fromIntegral) + + withExecutionInfo :: RemoteM a -> RemoteM a + withExecutionInfo f = withRunInIO $ \r -> do + -- So allocation counter doesn't go below the lower limit, which is + -- unlikely but should be accounted for at any rate + setAllocationCounter maxBound + start <- getCurrentTime + bytes0 <- getAllocationCounter + cpu0 <- getCPUTime + ws <- r f + end <- getCurrentTime + bytes1 <- getAllocationCounter + cpu1 <- getCPUTime + + ws <$ r (saveExecutionInfo (end, start) (bytes1, bytes0) (cpu1, cpu0)) + where + saveExecutionInfo :: + -- End and start times + (UTCTime, UTCTime) -> + -- Ending and beginning byte allocation + (Int64, Int64) -> + -- Ending and beginning CPU time + (Integer, Integer) -> + RemoteM () + saveExecutionInfo (end, start) (bytes1, bytes0) (cpu1, cpu0) = + executeStore q $ + ExecutionInfo uuid ipid start end allocated cpuMillis + where + -- Note that the allocation counter counts *down*, so we need to + -- subtract the second value from the first value + allocated :: Word64 + allocated = + fromIntegral + -- In the unlikely event that more memory was freed in + -- this thread between the beginning of evaluation and + -- the end, so we don't end up with `maxBound @Word64` + . max 0 + $ bytes0 - bytes1 + + -- Convert the picoseconds of CPU time to milliseconds + cpuMillis :: Word64 + cpuMillis = fromIntegral $ (cpu1 - cpu0) `div` 1_000_000_000 + + q :: Query + q = [sql| INSERT INTO exinfo VALUES (?, ?, ?, ?, ?, ?) |] getVcObject :: VCObjectHash -> RemoteM (VCMeta VCObject) getVcObject vch = diff --git a/inferno-ml-server/src/Inferno/ML/Server/Types.hs b/inferno-ml-server/src/Inferno/ML/Server/Types.hs index 9eb7ef5f..25b4c0bf 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Types.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Types.hs @@ -68,7 +68,8 @@ import GHC.Generics (Generic) import Inferno.Core (Interpreter) import Inferno.ML.Server.Module.Types as M import "inferno-ml-server-types" Inferno.ML.Server.Types as M hiding - ( InferenceParam, + ( ExecutionInfo, + InferenceParam, InferenceScript, InfernoMlServerAPI, Model, @@ -94,6 +95,7 @@ import UnliftIO (Async) import UnliftIO.IORef (IORef) import UnliftIO.MVar (MVar) import Web.HttpApiData (FromHttpApiData, ToHttpApiData) +import Data.UUID (UUID) type RemoteM = ReaderT Env IO @@ -353,6 +355,8 @@ f ?? x = ($ x) <$> f type InferenceParam = Types.InferenceParam (EntityId UId) (EntityId GId) PID VCObjectHash +type ExecutionInfo = Types.ExecutionInfo (EntityId UId) (EntityId GId) PID + type Model = Types.Model (EntityId UId) (EntityId GId) type ModelVersion = Types.ModelVersion (EntityId UId) (EntityId GId) Oid @@ -389,6 +393,16 @@ pattern VCMeta :: pattern VCMeta t a g n d p v o = Inferno.VersionControl.Types.VCMeta t a g n d p v o +pattern ExecutionInfo :: + UUID -> + Id InferenceParam -> + UTCTime -> + UTCTime -> + Word64 -> + Word64 -> + ExecutionInfo +pattern ExecutionInfo u i s e m c = Types.ExecutionInfo u i s e m c + type InfernoMlServerAPI = Types.InfernoMlServerAPI (EntityId UId) (EntityId GId) PID VCObjectHash EpochTime diff --git a/inferno-ml-server/src/Inferno/ML/Server/Utils.hs b/inferno-ml-server/src/Inferno/ML/Server/Utils.hs index 2b2ba81e..f1247e9f 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Utils.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Utils.hs @@ -5,14 +5,22 @@ module Inferno.ML.Server.Utils firstOrThrow, queryStore, bridgeCache, + executeStore, ) where +import Control.Monad (void) import Control.Monad.Catch (Exception, MonadThrow (throwM)) import Control.Monad.IO.Class (liftIO) import Data.Generics.Labels () import Data.Vector (Vector, (!?)) -import Database.PostgreSQL.Simple (FromRow, Query, ToRow) +import Database.PostgreSQL.Simple + ( FromRow, + Query, + ToRow, + execute, + withTransaction, + ) import Database.PostgreSQL.Simple.Vector (query) import Inferno.ML.Server.Types import Lens.Micro.Platform (view) @@ -23,6 +31,12 @@ throwInfernoError = either (throwM . InfernoError . SomeInfernoError) pure queryStore :: (ToRow b, FromRow a) => Query -> b -> RemoteM (Vector a) queryStore q x = view #store >>= \conn -> liftIO $ query conn q x +executeStore :: ToRow a => Query -> a -> RemoteM () +executeStore q x = + view #store >>= \conn -> + liftIO . withTransaction conn . void $ + execute conn q x + firstOrThrow :: (MonadThrow m, Exception e) => e -> Vector a -> m a firstOrThrow e = maybe (throwM e) pure . (!? 0) diff --git a/nix/inferno-ml/migrations/v1-create-tables.sql b/nix/inferno-ml/migrations/v1-create-tables.sql index dabb87e8..374966bb 100644 --- a/nix/inferno-ml/migrations/v1-create-tables.sql +++ b/nix/inferno-ml/migrations/v1-create-tables.sql @@ -74,5 +74,19 @@ create table if not exists params , "user" integer references users (id) ); +-- Execution info for inference evaluation +create table if not exists exinfo + ( id uuid primary key + , param integer not null references params (id) + -- When inference evaluation began + , started timestamptz not null + -- When inference evaluation ended + , ended timestamptz not null + -- Number of bytes allocated in the evaluation thread + , allocated bigint not null + -- CPU time between `start` and `end`, in milliseconds + , cpu bigint not null + ); + create trigger "manage-mversion-lo" before update or delete on mversions for each row execute function lo_manage(contents); From 1f60f22407991dffcb4006e373c028632cf1494f Mon Sep 17 00:00:00 2001 From: Rory Tyler Hayford Date: Mon, 20 May 2024 11:25:08 +0700 Subject: [PATCH 2/6] Change name --- .../src/Inferno/ML/Server/Types.hs | 8 ++++---- .../src/Inferno/ML/Server/Inference.hs | 16 ++++++++-------- inferno-ml-server/src/Inferno/ML/Server/Types.hs | 10 +++++----- nix/inferno-ml/migrations/v1-create-tables.sql | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs b/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs index 4fbb27ec..b2344940 100644 --- a/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs +++ b/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs @@ -649,7 +649,7 @@ instance -- @inferno-ml-server@ after script evaluation completes and can be queried -- later by using the same job identifier that was provided to the @/inference@ -- route -data ExecutionInfo uid gid p = ExecutionInfo +data EvaluationInfo uid gid p = EvaluationInfo { -- | Note that this is the job identifier provided to the inference -- evaluation route, and is also the primary key of the database table id :: UUID, @@ -667,9 +667,9 @@ data ExecutionInfo uid gid p = ExecutionInfo deriving stock (Show, Eq, Generic) deriving anyclass (FromJSON, ToJSON) -instance FromRow (ExecutionInfo uid gid p) where +instance FromRow (EvaluationInfo uid gid p) where fromRow = - ExecutionInfo + EvaluationInfo <$> field <*> field <*> field @@ -677,7 +677,7 @@ instance FromRow (ExecutionInfo uid gid p) where <*> fmap (fromIntegral @Int64) field <*> fmap (fromIntegral @Int64) field -instance ToRow (ExecutionInfo uid gid p) where +instance ToRow (EvaluationInfo uid gid p) where toRow ei = [ ei ^. the @"id" & toField, ei ^. the @"param" & toField, diff --git a/inferno-ml-server/src/Inferno/ML/Server/Inference.hs b/inferno-ml-server/src/Inferno/ML/Server/Inference.hs index 42fcba3c..f43a71b1 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Inference.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Inference.hs @@ -119,7 +119,7 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = -- the runtime. Specifically, we are using `getAllocationCounter`, but -- this only captures the allocations _in this thread only_ runInference :: Int -> RemoteM (Maybe (WriteStream IO)) - runInference tmo = timeout tmo . withExecutionInfo $ do + runInference tmo = timeout tmo . withEvaluationInfo $ do view #interpreter >>= readIORef >>= \case Nothing -> throwM BridgeNotRegistered Just interpreter -> do @@ -248,8 +248,8 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = (view (#config . #timeout) >>=) . (. (* 1_000_000) . fromIntegral) - withExecutionInfo :: RemoteM a -> RemoteM a - withExecutionInfo f = withRunInIO $ \r -> do + withEvaluationInfo :: RemoteM a -> RemoteM a + withEvaluationInfo f = withRunInIO $ \r -> do -- So allocation counter doesn't go below the lower limit, which is -- unlikely but should be accounted for at any rate setAllocationCounter maxBound @@ -261,9 +261,9 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = bytes1 <- getAllocationCounter cpu1 <- getCPUTime - ws <$ r (saveExecutionInfo (end, start) (bytes1, bytes0) (cpu1, cpu0)) + ws <$ r (saveEvaluationInfo (end, start) (bytes1, bytes0) (cpu1, cpu0)) where - saveExecutionInfo :: + saveEvaluationInfo :: -- End and start times (UTCTime, UTCTime) -> -- Ending and beginning byte allocation @@ -271,9 +271,9 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = -- Ending and beginning CPU time (Integer, Integer) -> RemoteM () - saveExecutionInfo (end, start) (bytes1, bytes0) (cpu1, cpu0) = + saveEvaluationInfo (end, start) (bytes1, bytes0) (cpu1, cpu0) = executeStore q $ - ExecutionInfo uuid ipid start end allocated cpuMillis + EvaluationInfo uuid ipid start end allocated cpuMillis where -- Note that the allocation counter counts *down*, so we need to -- subtract the second value from the first value @@ -291,7 +291,7 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = cpuMillis = fromIntegral $ (cpu1 - cpu0) `div` 1_000_000_000 q :: Query - q = [sql| INSERT INTO exinfo VALUES (?, ?, ?, ?, ?, ?) |] + q = [sql| INSERT INTO evalinfo VALUES (?, ?, ?, ?, ?, ?) |] getVcObject :: VCObjectHash -> RemoteM (VCMeta VCObject) getVcObject vch = diff --git a/inferno-ml-server/src/Inferno/ML/Server/Types.hs b/inferno-ml-server/src/Inferno/ML/Server/Types.hs index 25b4c0bf..77bce9a0 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Types.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Types.hs @@ -68,7 +68,7 @@ import GHC.Generics (Generic) import Inferno.Core (Interpreter) import Inferno.ML.Server.Module.Types as M import "inferno-ml-server-types" Inferno.ML.Server.Types as M hiding - ( ExecutionInfo, + ( EvaluationInfo, InferenceParam, InferenceScript, InfernoMlServerAPI, @@ -355,7 +355,7 @@ f ?? x = ($ x) <$> f type InferenceParam = Types.InferenceParam (EntityId UId) (EntityId GId) PID VCObjectHash -type ExecutionInfo = Types.ExecutionInfo (EntityId UId) (EntityId GId) PID +type EvaluationInfo = Types.EvaluationInfo (EntityId UId) (EntityId GId) PID type Model = Types.Model (EntityId UId) (EntityId GId) @@ -393,15 +393,15 @@ pattern VCMeta :: pattern VCMeta t a g n d p v o = Inferno.VersionControl.Types.VCMeta t a g n d p v o -pattern ExecutionInfo :: +pattern EvaluationInfo :: UUID -> Id InferenceParam -> UTCTime -> UTCTime -> Word64 -> Word64 -> - ExecutionInfo -pattern ExecutionInfo u i s e m c = Types.ExecutionInfo u i s e m c + EvaluationInfo +pattern EvaluationInfo u i s e m c = Types.EvaluationInfo u i s e m c type InfernoMlServerAPI = Types.InfernoMlServerAPI (EntityId UId) (EntityId GId) PID VCObjectHash EpochTime diff --git a/nix/inferno-ml/migrations/v1-create-tables.sql b/nix/inferno-ml/migrations/v1-create-tables.sql index 374966bb..738aedbd 100644 --- a/nix/inferno-ml/migrations/v1-create-tables.sql +++ b/nix/inferno-ml/migrations/v1-create-tables.sql @@ -75,7 +75,7 @@ create table if not exists params ); -- Execution info for inference evaluation -create table if not exists exinfo +create table if not exists evalinfo ( id uuid primary key , param integer not null references params (id) -- When inference evaluation began From 1319d2774f898589b5e5240584e6238987f402d9 Mon Sep 17 00:00:00 2001 From: Rory Tyler Hayford Date: Wed, 22 May 2024 13:14:59 +0700 Subject: [PATCH 3/6] Bump --- inferno-ml-server-types/CHANGELOG.md | 3 +++ inferno-ml-server-types/inferno-ml-server-types.cabal | 2 +- inferno-ml-server/CHANGELOG.md | 3 +++ inferno-ml-server/inferno-ml-server.cabal | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/inferno-ml-server-types/CHANGELOG.md b/inferno-ml-server-types/CHANGELOG.md index d4642783..d7993081 100644 --- a/inferno-ml-server-types/CHANGELOG.md +++ b/inferno-ml-server-types/CHANGELOG.md @@ -1,6 +1,9 @@ # Revision History for inferno-ml-server-types *Note*: we use https://pvp.haskell.org/ (MAJOR.MAJOR.MINOR.PATCH) +## 0.3.0 +* Add support for tracking evaluation info + ## 0.2.0 * Add `terminated` columns for DB types diff --git a/inferno-ml-server-types/inferno-ml-server-types.cabal b/inferno-ml-server-types/inferno-ml-server-types.cabal index 12ab2411..6817b0dc 100644 --- a/inferno-ml-server-types/inferno-ml-server-types.cabal +++ b/inferno-ml-server-types/inferno-ml-server-types.cabal @@ -1,6 +1,6 @@ cabal-version: 2.4 name: inferno-ml-server-types -version: 0.2.0 +version: 0.3.0 synopsis: Types for Inferno ML server description: Types for Inferno ML server homepage: https://github.com/plow-technologies/inferno.git#readme diff --git a/inferno-ml-server/CHANGELOG.md b/inferno-ml-server/CHANGELOG.md index 2fdbe003..f692cbf9 100644 --- a/inferno-ml-server/CHANGELOG.md +++ b/inferno-ml-server/CHANGELOG.md @@ -1,3 +1,6 @@ +## 2023.5.22 +* Add support for tracking evaluation info + ## 2023.4.3 * Add `terminated` column to DB types diff --git a/inferno-ml-server/inferno-ml-server.cabal b/inferno-ml-server/inferno-ml-server.cabal index 0777f4ea..e4e64dc5 100644 --- a/inferno-ml-server/inferno-ml-server.cabal +++ b/inferno-ml-server/inferno-ml-server.cabal @@ -1,6 +1,6 @@ cabal-version: 2.4 name: inferno-ml-server -version: 2023.4.3 +version: 2023.5.22 synopsis: Server for Inferno ML description: Server for Inferno ML homepage: https://github.com/plow-technologies/inferno.git#readme From 274f6658c2b7445dbc049af5ba1af9673f3766b0 Mon Sep 17 00:00:00 2001 From: Rory Tyler Hayford Date: Wed, 22 May 2024 13:22:22 +0700 Subject: [PATCH 4/6] Don't let `SqlError` break inference eval --- .../src/Inferno/ML/Server/Inference.hs | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/inferno-ml-server/src/Inferno/ML/Server/Inference.hs b/inferno-ml-server/src/Inferno/ML/Server/Inference.hs index f43a71b1..8b993602 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Inference.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Inference.hs @@ -36,6 +36,7 @@ import Data.Word (Word64) import Database.PostgreSQL.Simple ( Only (Only), Query, + SqlError, ) import Database.PostgreSQL.Simple.SqlQQ (sql) import Foreign.C (CTime) @@ -80,7 +81,12 @@ import UnliftIO.Directory removePathForcibly, withCurrentDirectory, ) -import UnliftIO.Exception (bracket_, catchIO, displayException) +import UnliftIO.Exception + ( bracket_, + catch, + catchIO, + displayException, + ) import UnliftIO.IO.File (writeBinaryFileDurableAtomic) import UnliftIO.IORef (readIORef) import UnliftIO.MVar (putMVar, takeMVar, withMVar) @@ -272,9 +278,16 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = (Integer, Integer) -> RemoteM () saveEvaluationInfo (end, start) (bytes1, bytes0) (cpu1, cpu0) = - executeStore q $ - EvaluationInfo uuid ipid start end allocated cpuMillis + insert `catch` logAndIgnore where + insert :: RemoteM () + insert = + executeStore q $ + EvaluationInfo uuid ipid start end allocated cpuMillis + where + q :: Query + q = [sql| INSERT INTO evalinfo VALUES (?, ?, ?, ?, ?, ?) |] + -- Note that the allocation counter counts *down*, so we need to -- subtract the second value from the first value allocated :: Word64 @@ -290,8 +303,16 @@ runInferenceParam ipid (fromMaybe 128 -> res) uuid = cpuMillis :: Word64 cpuMillis = fromIntegral $ (cpu1 - cpu0) `div` 1_000_000_000 - q :: Query - q = [sql| INSERT INTO evalinfo VALUES (?, ?, ?, ?, ?, ?) |] + -- We don't want a DB error to completely break inference + -- evaluation. Inability to store the eval info is more of + -- an inconvenience than a fatal error + logAndIgnore :: SqlError -> RemoteM () + logAndIgnore = + logTrace + . OtherWarn + . ("Failed to save eval info: " <>) + . Text.pack + . displayException getVcObject :: VCObjectHash -> RemoteM (VCMeta VCObject) getVcObject vch = From c1d8477939a1c8a535e091ebf0b3eb835d962d66 Mon Sep 17 00:00:00 2001 From: Rory Tyler Hayford Date: Wed, 22 May 2024 13:24:09 +0700 Subject: [PATCH 5/6] Haddock --- inferno-ml-server-types/src/Inferno/ML/Server/Types.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs b/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs index b2344940..e098c65a 100644 --- a/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs +++ b/inferno-ml-server-types/src/Inferno/ML/Server/Types.hs @@ -658,7 +658,11 @@ data EvaluationInfo uid gid p = EvaluationInfo start :: UTCTime, -- | When inference evaluation ended end :: UTCTime, - -- | The number of bytes allocated between the @start@ and @end@ + -- | The number of bytes allocated between the @start@ and @end@. Note + -- that this is /total/ allocation over the course of evaluation, which + -- can be many times greater than peak memory usage. Nevertheless, this + -- can be useful to track memory usage over time and across different + -- script revisions allocated :: Word64, -- | Additional CPU time used between the @start@ and @end@. This is -- converted from picoseconds to milliseconds From 1d0e0ecda005cc817aad6342948b05fb263df87b Mon Sep 17 00:00:00 2001 From: Rory Tyler Hayford Date: Wed, 22 May 2024 13:25:37 +0700 Subject: [PATCH 6/6] Formatting --- inferno-ml-server/src/Inferno/ML/Server/Types.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inferno-ml-server/src/Inferno/ML/Server/Types.hs b/inferno-ml-server/src/Inferno/ML/Server/Types.hs index 77bce9a0..069cc8a9 100644 --- a/inferno-ml-server/src/Inferno/ML/Server/Types.hs +++ b/inferno-ml-server/src/Inferno/ML/Server/Types.hs @@ -47,6 +47,7 @@ import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Text.Read as Text.Read import Data.Time (UTCTime) +import Data.UUID (UUID) import Data.Vector (Vector) import Data.Word (Word64) import Data.Yaml (decodeFileThrow) @@ -95,7 +96,6 @@ import UnliftIO (Async) import UnliftIO.IORef (IORef) import UnliftIO.MVar (MVar) import Web.HttpApiData (FromHttpApiData, ToHttpApiData) -import Data.UUID (UUID) type RemoteM = ReaderT Env IO