diff --git a/src/record_ts.cpp b/src/record_ts.cpp index 0607c787..3e07f7be 100644 --- a/src/record_ts.cpp +++ b/src/record_ts.cpp @@ -90,7 +90,7 @@ struct ReplayVariable { } /** * Allocates the data for this replay variable. - * If this is attempted twice, we test weather the allocated size is + * If this is attempted twice, we test Whether the allocated size is * sufficient and re-allocate the memory if necessary. */ void alloc(JitBackend backend, size_t dsize) { diff --git a/src/record_ts.h b/src/record_ts.h index 478bf8c8..697b1e87 100644 --- a/src/record_ts.h +++ b/src/record_ts.h @@ -9,6 +9,26 @@ #include "var.h" #include +void jitc_freeze_start(JitBackend backend, const uint32_t *inputs, + uint32_t n_inputs); + +Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs, + uint32_t n_outputs); + +void jitc_freeze_abort(JitBackend backend); + +void jitc_freeze_destroy(Recording *recording); + +bool jitc_freeze_pause(JitBackend backend); + +bool jitc_freeze_resume(JitBackend backend); + +void jitc_freeze_replay(Recording *recording, const uint32_t *inputs, + uint32_t *outputs); + +int jitc_freeze_dry_run(Recording *recording, const uint32_t *inputs, + uint32_t *outputs); + /// HashMap, mapping an allocation to a recorded variable using PtrToSlot = tsl::robin_map; @@ -63,11 +83,12 @@ struct Operation { struct { /// The reduce type of a prefix reduction operation ReduceOp rtype; - /// Weather a prefix sum operation is exclusive + /// Whether a prefix sum operation is exclusive bool exclusive; bool reverse; } prefix_reduce; /// The bucket count for the mkperm operation + /// The function has to be re-recorded when the bucket count changes. uint32_t bucket_count; /// Additional data such as the source of memset uint64_t data; @@ -77,13 +98,15 @@ struct Operation { /// Records the size of the largest input variable (directly accessed or /// through a pointer if the kernel has no direct inputs). size_t input_size = 0; - /// Weather this operation is enabled. We might have to disable some + /// Whether this operation is enabled. We might have to disable some /// operations after the fact, and removing them from the Recording would be /// more complicated. bool enabled = true; /// Does this operation use optix? bool uses_optix = false; - /// A copy of the shader binding table, used by the kernel. + /// A copy of the shader binding table including a deepcopy of its hit- and + /// miss- groups, used by the kernel. The callables are filled in by the \c + /// CUDAThreadState::launch function. OptixShaderBindingTable *sbt; }; @@ -91,7 +114,6 @@ struct Operation { /// /// Output variables are only tracked through the outputs array, as this /// information is only needed when constructing the output variables. -/// enum class RecordVarState { /// This variable was not initialized Uninit, @@ -99,7 +121,12 @@ enum class RecordVarState { OpOutput, /// This variable is part of the function input Input, - /// This variable has been captured + /// This variable has been captured i.e. it is copied and part of the + /// recording. + /// For example, the offset buffer of a vcall should not change between + /// recording and replay and can be copied. + /// Captured variables are immutable and copied when replaying, so that they + /// are not changed by the replaying kernels. Captured, }; @@ -199,7 +226,7 @@ struct ParamInfo { * \brief Represents a frozen function recording. And can be used to replay it. */ struct Recording { - /// Weather this recording requires a dryrun, to discover the size of + /// Whether this recording requires a dryrun, to discover the size of /// certain operations. bool requires_dry_run = false; @@ -738,7 +765,6 @@ struct RecordThreadState : ThreadState { * This is used by the input variables of a kernel. */ uint32_t add_variable(const void *ptr, RecordVariable rv) { - rv.ptr = ptr; auto it = this->ptr_to_slot.find(ptr); @@ -760,7 +786,7 @@ struct RecordThreadState : ThreadState { } /// Return the slot index given the data pointer of a variable. - /// This fails if the variable has not been added. + /// This fails if the variable has not been previously added. uint32_t get_variable(const void *ptr) { auto it = this->ptr_to_slot.find(ptr); @@ -802,8 +828,10 @@ struct RecordThreadState : ThreadState { if (info.test_uninit && rv.state == RecordVarState::Uninit) jitc_raise("record(): Varaible at slot s%u was read from by " "operation o%u, but has not yet been initialized! " - "This can happen if the variable was not part of " - "the input but is used by an recorded operation.", + "This can occur if the variable was not part of " + "the input but is used by a recorded operation, for " + "example if it was not specified as a member in a " + "DRJIT_STRUCT but used in the frozen function.", info.slot, (uint32_t) this->m_recording.operations.size()); @@ -829,8 +857,7 @@ struct RecordThreadState : ThreadState { uint32_t slot = this->get_variable(ptr); add_in_param(slot, vtype, test_uninit); } - /// Helper function recording an output access, given the slot and \ref - /// VarType + /// Helper function recording an output access, given the slot and \ref VarType void add_out_param(uint32_t slot, VarType vtype) { ParamInfo info; info.type = ParamType::Output; @@ -838,8 +865,7 @@ struct RecordThreadState : ThreadState { info.vtype = vtype; add_param(info); } - /// Helper function recording an output access, given the pointer and \ref - /// VarType + /// Helper function recording an output access, given the pointer and \ref VarType void add_out_param(const void *ptr, VarType vtype) { RecordVariable rv; uint32_t slot = this->add_variable(ptr, rv); @@ -851,23 +877,3 @@ struct RecordThreadState : ThreadState { add_out_param(slot, (VarType) vtype); } }; - -void jitc_freeze_start(JitBackend backend, const uint32_t *inputs, - uint32_t n_inputs); - -Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs, - uint32_t n_outputs); - -void jitc_freeze_abort(JitBackend backend); - -void jitc_freeze_destroy(Recording *recording); - -bool jitc_freeze_pause(JitBackend backend); - -bool jitc_freeze_resume(JitBackend backend); - -void jitc_freeze_replay(Recording *recording, const uint32_t *inputs, - uint32_t *outputs); - -int jitc_freeze_dry_run(Recording *recording, const uint32_t *inputs, - uint32_t *outputs); diff --git a/tests/record.cpp b/tests/record.cpp index 1f191afd..fd3be2b6 100644 --- a/tests/record.cpp +++ b/tests/record.cpp @@ -241,7 +241,7 @@ TEST_BOTH(04_deduplicating_output) { } /** - * This tests, weather it is possible to record multiple kernels in sequence. + * This tests, Whether it is possible to record multiple kernels in sequence. * The input of the second kernel relies on the execution of the first. * On LLVM, the correctness of barrier operations is therefore tested. */ @@ -296,7 +296,7 @@ TEST_BOTH(05_sequential_kernels) { } /** - * This tests, weather it is possible to record multiple independent kernels in + * This tests, Whether it is possible to record multiple independent kernels in * the same recording. * The variables of the kernels are of different size, therefore two kernels are * generated. At replay these can be executed in parallel (LLVM) or sequence