Grow sizes faster when we run a small number of tests.

Closes hedgehogqa#472. We now grow test size in fixed increments from 0 to 99, or as close as we can without going over, in however many tests we run. If we run more than n * 100 tests, then we go from 0 to 99 n times, and then do fixed increments for the remainder. Additionally, if we discard a bunch of times in a row we start to grow the size.
ChickenProp · Mar 14, 2024 · c65dc16 · c65dc16
1 parent 52f6c8a
commit c65dc16
Show file tree

Hide file tree

Showing 6 changed files with 205 additions and 49 deletions.
diff --git a/hedgehog/hedgehog.cabal b/hedgehog/hedgehog.cabal
@@ -137,6 +137,7 @@ test-suite test
     Test.Hedgehog.Filter
     Test.Hedgehog.Maybe
     Test.Hedgehog.Seed
+    Test.Hedgehog.Size
     Test.Hedgehog.Skip
     Test.Hedgehog.State
     Test.Hedgehog.Text

diff --git a/hedgehog/src/Hedgehog/Internal/Runner.hs b/hedgehog/src/Hedgehog/Internal/Runner.hs
@@ -41,7 +41,7 @@ import           Hedgehog.Internal.Property (Journal(..), Coverage(..), CoverCou
 import           Hedgehog.Internal.Property (Property(..), PropertyConfig(..), PropertyName(..))
 import           Hedgehog.Internal.Property (PropertyT(..), Failure(..), runTestT)
 import           Hedgehog.Internal.Property (ShrinkLimit, ShrinkRetries, withSkip)
-import           Hedgehog.Internal.Property (TerminationCriteria(..))
+import           Hedgehog.Internal.Property (TerminationCriteria(..), TestLimit(..))
 import           Hedgehog.Internal.Property (TestCount(..), PropertyCount(..))
 import           Hedgehog.Internal.Property (confidenceSuccess, confidenceFailure)
 import           Hedgehog.Internal.Property (coverageSuccess, journalCoverage)
@@ -52,7 +52,7 @@ import           Hedgehog.Internal.Region
 import           Hedgehog.Internal.Report
 import qualified Hedgehog.Internal.Seed as Seed
 import           Hedgehog.Internal.Tree (TreeT(..), NodeT(..))
-import           Hedgehog.Range (Size)
+import           Hedgehog.Range (Size(..))
 
 import           Language.Haskell.TH.Syntax (Lift)
 
@@ -206,12 +206,11 @@ checkReport ::
      MonadIO m
   => MonadCatch m
   => PropertyConfig
-  -> Size
   -> Seed
   -> PropertyT m ()
   -> (Report Progress -> m ())
   -> m (Report Result)
-checkReport cfg size0 seed0 test0 updateUI = do
+checkReport cfg seed0 test0 updateUI = do
   skip <- liftIO $ resolveSkip $ propertySkip cfg
 
   let
@@ -254,14 +253,16 @@ checkReport cfg size0 seed0 test0 updateUI = do
     loop ::
          TestCount
       -> DiscardCount
-      -> Size
       -> Seed
       -> Coverage CoverCount
       -> m (Report Result)
-    loop !tests !discards !size !seed !coverage0 = do
+    loop !tests !discards !seed !coverage0 = do
       updateUI $ Report tests discards coverage0 seed0 Running
 
       let
+        size =
+          calculateSize terminationCriteria tests discards
+
         coverageReached =
           successVerified tests coverage0
 
@@ -301,11 +302,7 @@ checkReport cfg size0 seed0 test0 updateUI = do
             failureReport $
               "Test coverage cannot be reached after " <> show tests <> " tests"
 
-      if size > 99 then
-        -- size has reached limit, reset to 0
-        loop tests discards 0 seed coverage0
-
-      else if enoughTestsRun then
+      if enoughTestsRun then
         -- at this point, we know that enough tests have been run in order to
         -- make a decision on if this was a successful run or not
         --
@@ -336,9 +333,9 @@ checkReport cfg size0 seed0 test0 updateUI = do
             -- start with the one that failed.
             (Just (n, d), _)
               | n > tests + 1 ->
-                loop (tests + 1) discards (size + 1) s1 coverage0
+                loop (tests + 1) discards s1 coverage0
               | d > discards ->
-                loop tests (discards + 1) (size + 1) s1 coverage0
+                loop tests (discards + 1) s1 coverage0
             (Just _, Just shrinkPath) -> do
               node <-
                 runTreeT . evalGenT size s0 . runTestT $ unPropertyT test
@@ -351,7 +348,7 @@ checkReport cfg size0 seed0 test0 updateUI = do
                 runTreeT . evalGenT size s0 . runTestT $ unPropertyT test
               case x of
                 Nothing ->
-                  loop tests (discards + 1) (size + 1) s1 coverage0
+                  loop tests (discards + 1) s1 coverage0
 
                 Just (Left _, _) ->
                   let
@@ -372,23 +369,80 @@ checkReport cfg size0 seed0 test0 updateUI = do
                     coverage =
                       journalCoverage journal <> coverage0
                   in
-                    loop (tests + 1) discards (size + 1) s1 coverage
+                    loop (tests + 1) discards s1 coverage
+
+  loop 0 0 seed0 mempty
+
+calculateSize :: TerminationCriteria -> TestCount -> DiscardCount -> Size
+calculateSize term (TestCount tests) (DiscardCount discards) =
+  let
+    growDiscards (Size n) =
+      -- If we're discarding a lot, try larger sizes. When we succeed, we could
+      -- reset the discard count we pass here; but we don't, because it would be
+      -- awkward to make that work with skipping. (We don't remember the order
+      -- of tests and discards.)
+      Size $ min 99 $ n + (discards `div` 10)
+
+    steppingSizer (TestLimit limit) =
+      -- `tests` runs from 0 up to `limit - 1`, so exactly `limit` tests get
+      -- run. Suppose `limit` is (100n + r). Then we do `n` cycles where size
+      -- goes 0,1,...,99, and then for the final `r` tests, we try to increase
+      -- from 0 to 99 in equal increments. So if `r` is 10 we go 0,11,22,...,99.
+      --
+      -- If we can't reach 99 we get as close as we can in equal increments
+      -- without going over, and if `r` is 1 we just run the final test at size
+      -- 0.
+      --
+      -- More examples:
+      --   r == 2: 0, 99
+      --   r == 3: 0, 49, 98
+      --   r == 4: 0, 33, 66, 99
+      --   r == 5: 0, 24, 48, 72, 96
+
+      let
+        (fullCycles, leftOvers) = limit `divMod` 100
+        (cyclesCompleted, cyclePos) = tests `divMod` 100
+      in
+        if tests >= limit then
+          error "test count is higher than limit"
+        else if cyclesCompleted < fullCycles then
+          Size cyclePos
+        else
+          -- leftOvers must be >= 1, or one of the previous branches would have
+          -- run.
+          if leftOvers == 1 then
+            Size 0
+          else
+            let step = 99 `div` (leftOvers - 1)
+            in Size $ cyclePos * step
+  in
+    growDiscards $ case term of
+      -- Run exactly `limit` tests.
+      NoConfidenceTermination limit ->
+        steppingSizer limit
+
+      -- Run exactly `limit` tests, but also use a confidence threshold for
+      -- coverage.
+      NoEarlyTermination _ limit ->
+        steppingSizer limit
 
-  loop 0 0 size0 seed0 mempty
+      -- Run some multiple of 100 tests. The TestLimit is ignored. That's likely
+      -- a bug elsewhere, but it makes this bit easy.
+      EarlyTermination _ _ ->
+        Size $ tests `mod` 100
 
 checkRegion ::
      MonadIO m
   => Region
   -> UseColor
   -> Maybe PropertyName
-  -> Size
   -> Seed
   -> Property
   -> m (Report Result)
-checkRegion region color name size seed prop =
+checkRegion region color name size prop =
   liftIO $ do
     result <-
-      checkReport (propertyConfig prop) size seed (propertyTest prop) $ \progress -> do
+      checkReport (propertyConfig prop) size (propertyTest prop) $ \progress -> do
         ppprogress <- renderProgress color name progress
         case reportStatus progress of
           Running ->
@@ -417,7 +471,7 @@ checkNamed ::
   -> m (Report Result)
 checkNamed region color name mseed prop = do
   seed <- resolveSeed mseed
-  checkRegion region color name 0 seed prop
+  checkRegion region color name seed prop
 
 -- | Check a property.
 --
@@ -434,7 +488,7 @@ recheckAt seed skip prop0 = do
   color <- detectColor
   let prop = withSkip skip prop0
   _ <- liftIO . displayRegion $ \region ->
-    checkRegion region color Nothing 0 seed prop
+    checkRegion region color Nothing seed prop
   pure ()
 
 -- | Check a group of properties using the specified runner config.

diff --git a/hedgehog/test/Test/Hedgehog/Size.hs b/hedgehog/test/Test/Hedgehog/Size.hs
@@ -0,0 +1,92 @@
+{-# LANGUAGE TemplateHaskell #-}
+
+module Test.Hedgehog.Size where
+
+import           Control.Monad                  ( void, when )
+import           Control.Monad.IO.Class         ( MonadIO(..) )
+
+import           Data.Foldable                  ( for_ )
+import qualified Data.IORef                    as IORef
+
+import           Hedgehog
+import qualified Hedgehog.Gen                  as Gen
+import qualified Hedgehog.Internal.Config      as Config
+import qualified Hedgehog.Internal.Property    as Property
+import           Hedgehog.Internal.Report       ( Report(..)
+                                                , Result(..)
+                                                )
+import qualified Hedgehog.Internal.Runner      as Runner
+
+checkProp :: MonadIO m => Property -> m (Report Result)
+checkProp prop = do
+  seed <- Config.resolveSeed Nothing
+  liftIO $ Runner.checkReport (Property.propertyConfig prop)
+                              seed
+                              (Property.propertyTest prop)
+                              (const $ pure ())
+
+checkGrowth ::
+  MonadIO m => (Property -> Property) -> [Size] -> m [Size]
+checkGrowth applyTerminationCriteria discardOn = do
+  logRef <- liftIO $ IORef.newIORef []
+
+  void $ checkProp $ applyTerminationCriteria $ property $ do
+    curSize <- forAll $ Gen.sized pure
+    liftIO $ IORef.modifyIORef' logRef (curSize :)
+    when (curSize `elem` discardOn) discard
+
+  liftIO $ reverse <$> IORef.readIORef logRef
+
+data GrowthTest =
+  GrowthTest
+    TestLimit -- ^ number of tests to run
+    [Size]    -- ^ which sizes should be discarded
+    [Size]    -- ^ the expected sizes run at (including ones discarded) for
+              --   NoConfidenceTermination and NoEarlyTermination
+    [Size]    -- ^ the expected sizes run at (including ones discarded) for
+              --   EarlyTermination
+
+growthTests :: [GrowthTest]
+growthTests =
+  [ GrowthTest 1   [] [0]                          [0 .. 99]
+  , GrowthTest 5   [] [0, 24 .. 96]                [0 .. 99]
+  , GrowthTest 10  [] [0, 11 .. 99]                [0 .. 99]
+  , GrowthTest 101 [] ([0 .. 99] ++ [0])           [0 .. 99]
+  , GrowthTest 105 [] ([0 .. 99] ++ [0, 24 .. 96]) [0 .. 99]
+  , GrowthTest 5   [24] (concat [[0], replicate 10 24, [25, 49, 73, 97]])
+                        (concat [ [0 .. 23]
+                                , replicate 10 24
+                                , [25]
+                                , [26 .. 99]
+                                , [99]
+                                ]
+                        )
+  , let discards = concat [ replicate 10 96
+                          , replicate 10 97
+                          , replicate 10 98
+                          , replicate 70 99 -- discard limit is 100
+                          ]
+    in GrowthTest 5 [96 .. 99] ([0, 24 .. 72] ++ discards)
+                               ([0 .. 95] ++ discards)
+  ]
+
+prop_GrowthTest :: Property
+prop_GrowthTest =
+  withTests 1 . property $ do
+    for_ growthTests $
+      \(GrowthTest testLimit discardOn expected1 expected2) -> do
+        let noConfidenceTerm = withTests testLimit
+        sizes1 <- checkGrowth noConfidenceTerm discardOn
+        sizes1 === expected1
+
+        let noEarlyTerm = withConfidence 1000 . noConfidenceTerm
+        sizes2 <- checkGrowth noEarlyTerm discardOn
+        sizes2 === expected1
+
+        let earlyTerm = verifiedTermination . noEarlyTerm
+        sizes3 <- checkGrowth earlyTerm discardOn
+        sizes3 === expected2
+
+tests :: IO Bool
+tests =
+  checkParallel $$(discover)