Skip to content

Commit

Permalink
[inferno-ml] Change input/output representation (#122)
Browse files Browse the repository at this point in the history
Changes the internal representation of `InferenceParam` inputs/outputs.
Instead of having two separate `inputs` and `outputs` as vectors, they
are now both in a single field and annotated with a `ScriptInputType`,
which controls readability/writability of the input. Also, instead of
being a `Vector`, they are now explicitly mapped to Inferno `Ident`s

The rationale for the merging `inputs`/`outputs`:
- We need to provide outputs as well as inputs as arguments during
script evaluation. If we don't do this, the identifiers used in
`makeWrites` will not resolve to anything
- We can't just concatenate the old `inputs` and `outputs` fields,
because users may wish to have identically named inputs and outputs; for
example, they may want to both read from and write to `input0`. This
would lead to the following during script eval:
  ```
  fun input0 input0 -> ...
  ``` 
- Accordingly, we need some way of allowing identically named
identifiers to be both inputs and outputs. Combining the two argument
types and storing if they are readable, writable, or both solves this

The rationale for storing the Inferno identifiers:
- When scripts are created (e.g. via an Inferno LSP server), the only
thing that is provided is the list of Inferno `Ident`s
- These `[Ident]` need to correspond exactly to the `inputs` that the
`InferenceParam` contains
- Even if we sort this list of identifiers, we need to make sure that
the order of the `InferenceParam`'s inputs is exactly the same as the
order of the original `[Ident]`. This could lead to bug-prone
assumptions or workarounds, since we would't have the original `[Ident]`
in the param
- To make sure that we always know which order is correct, we can store
the `Ident`s along with the actual inputs in a `Map` and then use
`Map.toAscList` to get the correct order for script arguments

Also I fixed the JSON encoding/decoding so that NaNs can be transmitted
to/from `inferno-ml-server`. Previously, `[null, 0.0, 0.0]` would parse
to `[IEmpty, IDouble 0.0, IDouble 0.0]`, which is of course incorrect.
Now it correctly parses to `[IDouble NaN, IDouble 0.0, IDouble 0.0]`
  • Loading branch information
ngua authored May 30, 2024
1 parent 98f8344 commit 52f28b6
Show file tree
Hide file tree
Showing 14 changed files with 190 additions and 107 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ jobs:
with:
install_url: https://releases.nixos.org/nix/nix-2.13.3/install
extra_nix_config: |
fallback = true
substituters = https://cache.nixos.org https://cache.iog.io
trusted-public-keys = cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= hydra.iohk.io:f/Ea+s+dFdN+3Y/G+FDgSq+a5NEWhJGzdjvKNGv0/EQ=
narinfo-cache-negative-ttl = 60
Expand Down
3 changes: 3 additions & 0 deletions inferno-ml-server-types/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Revision History for inferno-ml-server-types
*Note*: we use https://pvp.haskell.org/ (MAJOR.MAJOR.MINOR.PATCH)

## 0.4.0
* Change representation of script inputs/outputs

## 0.3.0
* Add support for tracking evaluation info

Expand Down
2 changes: 1 addition & 1 deletion inferno-ml-server-types/inferno-ml-server-types.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.4
name: inferno-ml-server-types
version: 0.3.0
version: 0.4.0
synopsis: Types for Inferno ML server
description: Types for Inferno ML server
homepage: https://github.com/plow-technologies/inferno.git#readme
Expand Down
112 changes: 87 additions & 25 deletions inferno-ml-server-types/src/Inferno/ML/Server/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import Conduit (ConduitT)
import Control.Applicative (asum, optional)
import Control.Category ((>>>))
import Control.DeepSeq (NFData (rnf), rwhnf)
import Control.Monad (void)
import Control.Monad (void, (<=<))
import Data.Aeson
import Data.Aeson.Types (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
Expand Down Expand Up @@ -64,6 +64,7 @@ import Database.PostgreSQL.Simple.Types
)
import Foreign.C (CUInt (CUInt))
import GHC.Generics (Generic)
import Inferno.Types.Syntax (Ident)
import Inferno.Types.VersionControl
( VCObjectHash,
byteStringToVCObjectHash,
Expand Down Expand Up @@ -172,8 +173,9 @@ newtype Id a = Id Int64

-- | Row for the table containing inference script closures
data InferenceScript uid gid = InferenceScript
{ -- NOTE: This is the ID for each row, stored as a `bytea` (bytes of the hash)
{ -- | This is the ID for each row, stored as a @bytea@ (bytes of the hash)
hash :: VCObjectHash,
-- | Script closure
obj :: VCMeta uid gid VCObject
}
deriving stock (Show, Eq, Generic)
Expand Down Expand Up @@ -237,6 +239,8 @@ data Model uid gid = Model
-- | The user who owns the model, if any. Note that owning a model
-- will implicitly set permissions
user :: Maybe uid,
-- | The time that this model was \"deleted\", if any. For active models,
-- this will be @Nothing@
terminated :: Maybe UTCTime
}
deriving stock (Show, Eq, Generic)
Expand Down Expand Up @@ -341,6 +345,8 @@ data ModelVersion uid gid c = ModelVersion
-- the PSQL large object table
contents :: c,
version :: Version,
-- | The time that this model version was \"deleted\", if any. For active
-- models versions, this will be @Nothing@
terminated :: Maybe UTCTime
}
deriving stock (Show, Eq, Generic)
Expand Down Expand Up @@ -592,13 +598,24 @@ data InferenceParam uid gid p s = InferenceParam
-- (e.g. a UUID for use with @inferno-lsp@)
--
-- For existing inference params, this is the foreign key for the specific
-- script in the 'InferenceScript' table
-- script in the 'InferenceScript' table (i.e. a @VCObjectHash@)
script :: s,
-- | This needs to be linked to a specific version of a model rather
-- than the @model@ table itself
model :: Id (ModelVersion uid gid Oid),
inputs :: Vector (SingleOrMany p),
outputs :: Vector (SingleOrMany p),
-- | This is called @inputs@ but is also used for script outputs as
-- well. The access (input or output) is controlled by the 'ScriptInputType'.
-- For example, if this field is set to @[("input0", Single (p, Readable))]@,
-- the script will only have a single read-only input and will not be able to
-- write anywhere (note that we should disallow this scenario, as script
-- evaluation would not work properly)
--
-- Mapping the input\/output to the Inferno identifier helps ensure that
-- Inferno identifiers are always pointing to the correct input\/output;
-- otherwise we would need to rely on the order of the original identifiers
inputs :: Map Ident (SingleOrMany p, ScriptInputType),
-- | The time that this parameter was \"deleted\", if any. For active
-- parameters, this will be @Nothing@
terminated :: Maybe UTCTime,
user :: uid
}
Expand All @@ -619,10 +636,6 @@ instance
<$> field
<*> fmap wrappedTo (field @VCObjectHashRow)
<*> field
-- HACK / FIXME This is a pretty awful hack (storing as `jsonb`),
-- but Postgres sub-arrays need to be the same length and writing
-- a custom parser might be painful
<*> fmap getAeson field
<*> fmap getAeson field
<*> field
<*> field
Expand All @@ -638,13 +651,41 @@ instance
[ toField Default,
ip ^. the @"script" & VCObjectHashRow & toField,
ip ^. the @"model" & toField,
-- HACK / FIXME See above
ip ^. the @"inputs" & Aeson & toField,
ip ^. the @"outputs" & Aeson & toField,
toField Default,
ip ^. the @"user" & toField
]

-- | Controls input interaction within a script, i.e. ability to read from
-- and\/or write to this input. Although the term \"input\" is used, those with
-- writes enabled can also be described as \"outputs\"
data ScriptInputType
= -- | Script input can be read, but not written
Readable
| -- | Script input can be written, i.e. can be used in array of
-- write objects returned from script evaluation
Writable
| -- | Script input can be both read from and written to; this allows
-- the same script identifier to point to the same PID with both
-- types of access enabled
ReadableWritable
deriving stock (Show, Eq, Generic)
deriving anyclass (NFData)

instance FromJSON ScriptInputType where
parseJSON = withText "ScriptInputType" $ \case
"r" -> pure Readable
"w" -> pure Writable
"rw" -> pure ReadableWritable
s -> fail $ "Invalid script input type: " <> Text.unpack s

instance ToJSON ScriptInputType where
toJSON =
String . \case
Readable -> "r"
Writable -> "w"
ReadableWritable -> "rw"

-- | Information about execution time and resource usage. This is saved by
-- @inferno-ml-server@ after script evaluation completes and can be queried
-- later by using the same job identifier that was provided to the @/inference@
Expand Down Expand Up @@ -762,22 +803,42 @@ instance FromJSON IValue where
Number n -> pure . IDouble $ toRealFloat n
-- It's easier to just mark the time explicitly in an object,
-- rather than try to deal with distinguishing times and doubles
Object o -> ITime <$> o .: "time"
Object o ->
asum
[ ITime <$> o .: "time",
fmap IArray $ arrayP =<< o .: "array"
]
-- Note that this preserves a plain JSON array for tuples. But we need
-- some straightforward way of distinguishing tuples and arrays; since
-- the bridge often transmits a large number of individual tuples (times
-- and values), it's better to use arrays for the tuples and a tagged object
-- for arrays themselves; we often will only deal with one large array, and
-- adding a few bytes to this is better than adding a few bytes to thousands
-- of encoded tuples
Array a
| [x, y] <- Vector.toList a ->
(,) <$> parseJSON x <*> parseJSON y <&> \case
-- We don't want to confuse a two-element array of tuples with
-- a tuple itself. For example, `"[[10.0, {\"time\": 10}], [10.0, {\"time\": 10}]]"`
-- should parse as a two-element array of `(double, time)` tuples,
-- not as a `((double, time), (double, time))`. I can't think of
-- any reason to support the latter. An alternative would be to
-- change tuple encoding to an object, but then we would be transmitting
-- a much larger amount of data on most requests
(f@(ITuple _), s@(ITuple _)) -> IArray $ Vector.fromList [f, s]
t -> ITuple t
| otherwise -> IArray <$> traverse (parseJSON @IValue) a
fmap ITuple $ (,) <$> parseJSON x <*> parseJSON y
| otherwise -> fail "Only two-element tuples are supported"
Null -> pure IEmpty
_ -> fail "Expected one of: string, double, time, tuple, unit (empty array), array"
_ -> fail "Expected one of: string, double, time, tuple, null, array"
where
arrayP :: Vector Value -> Parser (Vector IValue)
arrayP a =
-- This is a bit tedious, but we want to make sure that the array elements
-- are homogeneous; parsing all elements to `IValue`s first can't guarantee
-- this
asum
[ -- This alternative means that `null` will be correctly parsed to NaN
-- when inside an array of doubles
fmap IDouble <$> traverse parseJSON a,
fmap ITuple <$> traverse parseJSON a,
fmap IText <$> traverse parseJSON a,
fmap ITime <$> traverse (withObject "EpochTime" (.: "time")) a,
-- Nested array support
fmap IArray
<$> traverse (withObject "IArray" (arrayP <=< (.: "array"))) a,
fail "Expected a heterogeneous array"
]

instance ToJSON IValue where
toJSON = \case
Expand All @@ -786,8 +847,9 @@ instance ToJSON IValue where
ITuple t -> toJSON t
-- See `FromJSON` instance above
ITime t -> object ["time" .= t]
-- See `FromJSON` instance above
IArray is -> object ["array" .= is]
IEmpty -> toJSON Null
IArray is -> toJSON is

-- | Used to represent inputs to the script. 'Many' allows for an array input
data SingleOrMany a
Expand Down
3 changes: 3 additions & 0 deletions inferno-ml-server/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 2023.5.29
* Change representation of script inputs/outputs

## 2023.5.22
* Add support for tracking evaluation info

Expand Down
6 changes: 4 additions & 2 deletions inferno-ml-server/exe/Dummy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ valueAt _ p t =
<&> maybe IEmpty IDouble . preview (at p . _Just . at t . _Just)

latestValueAndTimeBefore :: Int -> PID -> DummyM IValue
latestValueAndTimeBefore _ _ = throwIO $ userError "Unsupported"
latestValueAndTimeBefore _ _ =
throwIO $
userError "Unsupported: latestValueAndTimeBefore"

valuesBetween :: Int64 -> PID -> Int -> Int -> ReaderT DummyEnv IO IValue
valuesBetween _ _ _ _ = throwIO $ userError "Unsupported"
valuesBetween _ _ _ _ = throwIO $ userError "Unsupported: valuesBetween"
43 changes: 22 additions & 21 deletions inferno-ml-server/exe/ParseAndSave.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ module ParseAndSave (main) where
import Control.Category ((>>>))
import Control.Exception (Exception (displayException))
import Control.Monad (void)
import Data.Aeson (eitherDecode)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as Char8
import qualified Data.ByteString.Lazy.Char8 as Lazy.Char8
import Data.Map.Strict (Map)
import Data.Text (Text)
import qualified Data.Text.IO as Text.IO
import Data.Time.Clock.POSIX (getPOSIXTime)
import Data.Vector (Vector)
import qualified Data.Vector as Vector
import Database.PostgreSQL.Simple
( Connection,
Query,
Expand All @@ -37,7 +38,7 @@ import Inferno.Core
import Inferno.ML.Server.Module.Prelude (mkBridgePrelude)
import Inferno.ML.Server.Types
import Inferno.ML.Types.Value (customTypes)
import Inferno.Types.Syntax (Expr, TCScheme)
import Inferno.Types.Syntax (Expr, Ident, TCScheme)
import Inferno.Types.VersionControl
( Pinned,
VCObjectHash,
Expand All @@ -50,35 +51,37 @@ import Inferno.VersionControl.Types
)
import System.Environment (getArgs)
import System.Exit (die)
import Text.Read (readMaybe)
import UnliftIO.Exception (bracket, throwString)

main :: IO ()
main =
getArgs >>= \case
scriptp : p : conns : _ ->
maybe
(throwString "Invalid PID")
(parseAndSave scriptp (Char8.pack conns) . PID)
$ readMaybe p
_ -> die "Usage ./parse <SCRIPT-PATH> <PID> <DB-STR>"

parseAndSave :: FilePath -> ByteString -> PID -> IO ()
parseAndSave p conns pid = do
scriptp : pstr : conns : _ ->
either throwString (parseAndSave scriptp (Char8.pack conns))
. eitherDecode
$ Lazy.Char8.pack pstr
_ -> die "Usage ./parse <SCRIPT-PATH> <PID-MAP-JSON> <DB-STR>"

parseAndSave ::
FilePath ->
ByteString ->
Map Ident (SingleOrMany PID, ScriptInputType) ->
IO ()
parseAndSave p conns inputs = do
t <- Text.IO.readFile p
now <- fromIntegral @Int . round <$> getPOSIXTime
ast <-
either (throwString . displayException) pure . (`parse` t)
=<< mkInferno @_ @BridgeMlValue (mkBridgePrelude funs) customTypes
bracket (connectPostgreSQL conns) close (saveScriptAndParam ast now pid)
bracket (connectPostgreSQL conns) close (saveScriptAndParam ast now inputs)

saveScriptAndParam ::
(Expr (Pinned VCObjectHash) (), TCScheme) ->
CTime ->
PID ->
Map Ident (SingleOrMany PID, ScriptInputType) ->
Connection ->
IO ()
saveScriptAndParam x now pid conn = insertScript *> insertParam
saveScriptAndParam x now inputs conn = insertScript *> insertParam
where
insertScript :: IO ()
insertScript =
Expand All @@ -96,17 +99,15 @@ saveScriptAndParam x now pid conn = insertScript *> insertParam
. InferenceParam
Nothing
hash
-- Bit of a hack. We only have one model version in the
-- tests, so we can just hard-code the ID here
(Id 1)
inputs
mempty
Nothing
$ entityIdFromInteger 0
where
q :: Query
q = [sql| INSERT INTO params VALUES (?, ?, ?, ?, ?, ?, ?) |]

inputs :: Vector (SingleOrMany PID)
inputs = Vector.singleton $ Single pid
q = [sql| INSERT INTO params VALUES (?, ?, ?, ?, ?, ?) |]

vcfunc :: VCObject
vcfunc = uncurry VCFunction x
Expand Down
6 changes: 4 additions & 2 deletions inferno-ml-server/inferno-ml-server.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.4
name: inferno-ml-server
version: 2023.5.22
version: 2023.5.29
synopsis: Server for Inferno ML
description: Server for Inferno ML
homepage: https://github.com/plow-technologies/inferno.git#readme
Expand Down Expand Up @@ -120,7 +120,7 @@ executable tests
executable test-client
import: common
main-is: Client.hs
hs-source-dirs: exe
hs-source-dirs: test
ghc-options: -threaded -rtsopts -main-is Client
build-depends:
, aeson
Expand Down Expand Up @@ -166,8 +166,10 @@ executable parse-and-save
hs-source-dirs: exe
ghc-options: -threaded -rtsopts -main-is ParseAndSave
build-depends:
, aeson
, base
, bytestring
, containers
, inferno-core
, inferno-ml
, inferno-ml-server
Expand Down
Loading

0 comments on commit 52f28b6

Please sign in to comment.