Skip to content

Commit

Permalink
Grow sizes faster when we run a small number of tests.
Browse files Browse the repository at this point in the history
Closes hedgehogqa#472. We now grow test size in fixed increments from 0 to 99, or
as close as we can without going over, in however many tests we run. If
we run more than n * 100 tests, then we go from 0 to 99 n times, and
then do fixed increments for the remainder. Additionally, if we discard
a bunch of times in a row we start to grow the size.
  • Loading branch information
ChickenProp committed Mar 14, 2024
1 parent 52f6c8a commit c65dc16
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 49 deletions.
1 change: 1 addition & 0 deletions hedgehog/hedgehog.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ test-suite test
Test.Hedgehog.Filter
Test.Hedgehog.Maybe
Test.Hedgehog.Seed
Test.Hedgehog.Size
Test.Hedgehog.Skip
Test.Hedgehog.State
Test.Hedgehog.Text
Expand Down
96 changes: 75 additions & 21 deletions hedgehog/src/Hedgehog/Internal/Runner.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import Hedgehog.Internal.Property (Journal(..), Coverage(..), CoverCou
import Hedgehog.Internal.Property (Property(..), PropertyConfig(..), PropertyName(..))
import Hedgehog.Internal.Property (PropertyT(..), Failure(..), runTestT)
import Hedgehog.Internal.Property (ShrinkLimit, ShrinkRetries, withSkip)
import Hedgehog.Internal.Property (TerminationCriteria(..))
import Hedgehog.Internal.Property (TerminationCriteria(..), TestLimit(..))
import Hedgehog.Internal.Property (TestCount(..), PropertyCount(..))
import Hedgehog.Internal.Property (confidenceSuccess, confidenceFailure)
import Hedgehog.Internal.Property (coverageSuccess, journalCoverage)
Expand All @@ -52,7 +52,7 @@ import Hedgehog.Internal.Region
import Hedgehog.Internal.Report
import qualified Hedgehog.Internal.Seed as Seed
import Hedgehog.Internal.Tree (TreeT(..), NodeT(..))
import Hedgehog.Range (Size)
import Hedgehog.Range (Size(..))

import Language.Haskell.TH.Syntax (Lift)

Expand Down Expand Up @@ -206,12 +206,11 @@ checkReport ::
MonadIO m
=> MonadCatch m
=> PropertyConfig
-> Size
-> Seed
-> PropertyT m ()
-> (Report Progress -> m ())
-> m (Report Result)
checkReport cfg size0 seed0 test0 updateUI = do
checkReport cfg seed0 test0 updateUI = do
skip <- liftIO $ resolveSkip $ propertySkip cfg

let
Expand Down Expand Up @@ -254,14 +253,16 @@ checkReport cfg size0 seed0 test0 updateUI = do
loop ::
TestCount
-> DiscardCount
-> Size
-> Seed
-> Coverage CoverCount
-> m (Report Result)
loop !tests !discards !size !seed !coverage0 = do
loop !tests !discards !seed !coverage0 = do
updateUI $ Report tests discards coverage0 seed0 Running

let
size =
calculateSize terminationCriteria tests discards

coverageReached =
successVerified tests coverage0

Expand Down Expand Up @@ -301,11 +302,7 @@ checkReport cfg size0 seed0 test0 updateUI = do
failureReport $
"Test coverage cannot be reached after " <> show tests <> " tests"

if size > 99 then
-- size has reached limit, reset to 0
loop tests discards 0 seed coverage0

else if enoughTestsRun then
if enoughTestsRun then
-- at this point, we know that enough tests have been run in order to
-- make a decision on if this was a successful run or not
--
Expand Down Expand Up @@ -336,9 +333,9 @@ checkReport cfg size0 seed0 test0 updateUI = do
-- start with the one that failed.
(Just (n, d), _)
| n > tests + 1 ->
loop (tests + 1) discards (size + 1) s1 coverage0
loop (tests + 1) discards s1 coverage0
| d > discards ->
loop tests (discards + 1) (size + 1) s1 coverage0
loop tests (discards + 1) s1 coverage0
(Just _, Just shrinkPath) -> do
node <-
runTreeT . evalGenT size s0 . runTestT $ unPropertyT test
Expand All @@ -351,7 +348,7 @@ checkReport cfg size0 seed0 test0 updateUI = do
runTreeT . evalGenT size s0 . runTestT $ unPropertyT test
case x of
Nothing ->
loop tests (discards + 1) (size + 1) s1 coverage0
loop tests (discards + 1) s1 coverage0

Just (Left _, _) ->
let
Expand All @@ -372,23 +369,80 @@ checkReport cfg size0 seed0 test0 updateUI = do
coverage =
journalCoverage journal <> coverage0
in
loop (tests + 1) discards (size + 1) s1 coverage
loop (tests + 1) discards s1 coverage

loop 0 0 seed0 mempty

calculateSize :: TerminationCriteria -> TestCount -> DiscardCount -> Size
calculateSize term (TestCount tests) (DiscardCount discards) =
let
growDiscards (Size n) =
-- If we're discarding a lot, try larger sizes. When we succeed, we could
-- reset the discard count we pass here; but we don't, because it would be
-- awkward to make that work with skipping. (We don't remember the order
-- of tests and discards.)
Size $ min 99 $ n + (discards `div` 10)

steppingSizer (TestLimit limit) =
-- `tests` runs from 0 up to `limit - 1`, so exactly `limit` tests get
-- run. Suppose `limit` is (100n + r). Then we do `n` cycles where size
-- goes 0,1,...,99, and then for the final `r` tests, we try to increase
-- from 0 to 99 in equal increments. So if `r` is 10 we go 0,11,22,...,99.
--
-- If we can't reach 99 we get as close as we can in equal increments
-- without going over, and if `r` is 1 we just run the final test at size
-- 0.
--
-- More examples:
-- r == 2: 0, 99
-- r == 3: 0, 49, 98
-- r == 4: 0, 33, 66, 99
-- r == 5: 0, 24, 48, 72, 96

let
(fullCycles, leftOvers) = limit `divMod` 100
(cyclesCompleted, cyclePos) = tests `divMod` 100
in
if tests >= limit then
error "test count is higher than limit"
else if cyclesCompleted < fullCycles then
Size cyclePos
else
-- leftOvers must be >= 1, or one of the previous branches would have
-- run.
if leftOvers == 1 then
Size 0
else
let step = 99 `div` (leftOvers - 1)
in Size $ cyclePos * step
in
growDiscards $ case term of
-- Run exactly `limit` tests.
NoConfidenceTermination limit ->
steppingSizer limit

-- Run exactly `limit` tests, but also use a confidence threshold for
-- coverage.
NoEarlyTermination _ limit ->
steppingSizer limit

loop 0 0 size0 seed0 mempty
-- Run some multiple of 100 tests. The TestLimit is ignored. That's likely
-- a bug elsewhere, but it makes this bit easy.
EarlyTermination _ _ ->
Size $ tests `mod` 100

checkRegion ::
MonadIO m
=> Region
-> UseColor
-> Maybe PropertyName
-> Size
-> Seed
-> Property
-> m (Report Result)
checkRegion region color name size seed prop =
checkRegion region color name size prop =
liftIO $ do
result <-
checkReport (propertyConfig prop) size seed (propertyTest prop) $ \progress -> do
checkReport (propertyConfig prop) size (propertyTest prop) $ \progress -> do
ppprogress <- renderProgress color name progress
case reportStatus progress of
Running ->
Expand Down Expand Up @@ -417,7 +471,7 @@ checkNamed ::
-> m (Report Result)
checkNamed region color name mseed prop = do
seed <- resolveSeed mseed
checkRegion region color name 0 seed prop
checkRegion region color name seed prop

-- | Check a property.
--
Expand All @@ -434,7 +488,7 @@ recheckAt seed skip prop0 = do
color <- detectColor
let prop = withSkip skip prop0
_ <- liftIO . displayRegion $ \region ->
checkRegion region color Nothing 0 seed prop
checkRegion region color Nothing seed prop
pure ()

-- | Check a group of properties using the specified runner config.
Expand Down
92 changes: 92 additions & 0 deletions hedgehog/test/Test/Hedgehog/Size.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{-# LANGUAGE TemplateHaskell #-}

module Test.Hedgehog.Size where

import Control.Monad ( void, when )
import Control.Monad.IO.Class ( MonadIO(..) )

import Data.Foldable ( for_ )
import qualified Data.IORef as IORef

import Hedgehog
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Internal.Config as Config
import qualified Hedgehog.Internal.Property as Property
import Hedgehog.Internal.Report ( Report(..)
, Result(..)
)
import qualified Hedgehog.Internal.Runner as Runner

checkProp :: MonadIO m => Property -> m (Report Result)
checkProp prop = do
seed <- Config.resolveSeed Nothing
liftIO $ Runner.checkReport (Property.propertyConfig prop)
seed
(Property.propertyTest prop)
(const $ pure ())

checkGrowth ::
MonadIO m => (Property -> Property) -> [Size] -> m [Size]
checkGrowth applyTerminationCriteria discardOn = do
logRef <- liftIO $ IORef.newIORef []

void $ checkProp $ applyTerminationCriteria $ property $ do
curSize <- forAll $ Gen.sized pure
liftIO $ IORef.modifyIORef' logRef (curSize :)
when (curSize `elem` discardOn) discard

liftIO $ reverse <$> IORef.readIORef logRef

data GrowthTest =
GrowthTest
TestLimit -- ^ number of tests to run
[Size] -- ^ which sizes should be discarded
[Size] -- ^ the expected sizes run at (including ones discarded) for
-- NoConfidenceTermination and NoEarlyTermination
[Size] -- ^ the expected sizes run at (including ones discarded) for
-- EarlyTermination

growthTests :: [GrowthTest]
growthTests =
[ GrowthTest 1 [] [0] [0 .. 99]
, GrowthTest 5 [] [0, 24 .. 96] [0 .. 99]
, GrowthTest 10 [] [0, 11 .. 99] [0 .. 99]
, GrowthTest 101 [] ([0 .. 99] ++ [0]) [0 .. 99]
, GrowthTest 105 [] ([0 .. 99] ++ [0, 24 .. 96]) [0 .. 99]
, GrowthTest 5 [24] (concat [[0], replicate 10 24, [25, 49, 73, 97]])
(concat [ [0 .. 23]
, replicate 10 24
, [25]
, [26 .. 99]
, [99]
]
)
, let discards = concat [ replicate 10 96
, replicate 10 97
, replicate 10 98
, replicate 70 99 -- discard limit is 100
]
in GrowthTest 5 [96 .. 99] ([0, 24 .. 72] ++ discards)
([0 .. 95] ++ discards)
]

prop_GrowthTest :: Property
prop_GrowthTest =
withTests 1 . property $ do
for_ growthTests $
\(GrowthTest testLimit discardOn expected1 expected2) -> do
let noConfidenceTerm = withTests testLimit
sizes1 <- checkGrowth noConfidenceTerm discardOn
sizes1 === expected1

let noEarlyTerm = withConfidence 1000 . noConfidenceTerm
sizes2 <- checkGrowth noEarlyTerm discardOn
sizes2 === expected1

let earlyTerm = verifiedTermination . noEarlyTerm
sizes3 <- checkGrowth earlyTerm discardOn
sizes3 === expected2

tests :: IO Bool
tests =
checkParallel $$(discover)
Loading

0 comments on commit c65dc16

Please sign in to comment.