Skip to content

Commit

Permalink
Addressed most comments from @merlinND review 2nd part
Browse files Browse the repository at this point in the history
  • Loading branch information
DoeringChristian committed Nov 4, 2024
1 parent 5d97c79 commit 7ea730c
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 37 deletions.
2 changes: 1 addition & 1 deletion src/record_ts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct ReplayVariable {
}
/**
* Allocates the data for this replay variable.
* If this is attempted twice, we test weather the allocated size is
* If this is attempted twice, we test Whether the allocated size is
* sufficient and re-allocate the memory if necessary.
*/
void alloc(JitBackend backend, size_t dsize) {
Expand Down
74 changes: 40 additions & 34 deletions src/record_ts.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,26 @@
#include "var.h"
#include <cstdint>

void jitc_freeze_start(JitBackend backend, const uint32_t *inputs,
uint32_t n_inputs);

Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs,
uint32_t n_outputs);

void jitc_freeze_abort(JitBackend backend);

void jitc_freeze_destroy(Recording *recording);

bool jitc_freeze_pause(JitBackend backend);

bool jitc_freeze_resume(JitBackend backend);

void jitc_freeze_replay(Recording *recording, const uint32_t *inputs,
uint32_t *outputs);

int jitc_freeze_dry_run(Recording *recording, const uint32_t *inputs,
uint32_t *outputs);

/// HashMap, mapping an allocation to a recorded variable
using PtrToSlot = tsl::robin_map<const void *, uint32_t, PointerHasher>;

Expand Down Expand Up @@ -63,11 +83,12 @@ struct Operation {
struct {
/// The reduce type of a prefix reduction operation
ReduceOp rtype;
/// Weather a prefix sum operation is exclusive
/// Whether a prefix sum operation is exclusive
bool exclusive;
bool reverse;
} prefix_reduce;
/// The bucket count for the mkperm operation
/// The function has to be re-recorded when the bucket count changes.
uint32_t bucket_count;
/// Additional data such as the source of memset
uint64_t data;
Expand All @@ -77,29 +98,35 @@ struct Operation {
/// Records the size of the largest input variable (directly accessed or
/// through a pointer if the kernel has no direct inputs).
size_t input_size = 0;
/// Weather this operation is enabled. We might have to disable some
/// Whether this operation is enabled. We might have to disable some
/// operations after the fact, and removing them from the Recording would be
/// more complicated.
bool enabled = true;
/// Does this operation use optix?
bool uses_optix = false;
/// A copy of the shader binding table, used by the kernel.
/// A copy of the shader binding table including a deepcopy of its hit- and
/// miss- groups, used by the kernel. The callables are filled in by the \c
/// CUDAThreadState::launch function.
OptixShaderBindingTable *sbt;
};

/// Denotes the type of variable.
///
/// Output variables are only tracked through the outputs array, as this
/// information is only needed when constructing the output variables.
///
enum class RecordVarState {
/// This variable was not initialized
Uninit,
/// This variable has been created by an operation
OpOutput,
/// This variable is part of the function input
Input,
/// This variable has been captured
/// This variable has been captured i.e. it is copied and part of the
/// recording.
/// For example, the offset buffer of a vcall should not change between
/// recording and replay and can be copied.
/// Captured variables are immutable and copied when replaying, so that they
/// are not changed by the replaying kernels.
Captured,
};

Expand Down Expand Up @@ -199,7 +226,7 @@ struct ParamInfo {
* \brief Represents a frozen function recording. And can be used to replay it.
*/
struct Recording {
/// Weather this recording requires a dryrun, to discover the size of
/// Whether this recording requires a dryrun, to discover the size of
/// certain operations.
bool requires_dry_run = false;

Expand Down Expand Up @@ -738,7 +765,6 @@ struct RecordThreadState : ThreadState {
* This is used by the input variables of a kernel.
*/
uint32_t add_variable(const void *ptr, RecordVariable rv) {

rv.ptr = ptr;
auto it = this->ptr_to_slot.find(ptr);

Expand All @@ -760,7 +786,7 @@ struct RecordThreadState : ThreadState {
}

/// Return the slot index given the data pointer of a variable.
/// This fails if the variable has not been added.
/// This fails if the variable has not been previously added.
uint32_t get_variable(const void *ptr) {
auto it = this->ptr_to_slot.find(ptr);

Expand Down Expand Up @@ -802,8 +828,10 @@ struct RecordThreadState : ThreadState {
if (info.test_uninit && rv.state == RecordVarState::Uninit)
jitc_raise("record(): Varaible at slot s%u was read from by "
"operation o%u, but has not yet been initialized! "
"This can happen if the variable was not part of "
"the input but is used by an recorded operation.",
"This can occur if the variable was not part of "
"the input but is used by a recorded operation, for "
"example if it was not specified as a member in a "
"DRJIT_STRUCT but used in the frozen function.",
info.slot,
(uint32_t) this->m_recording.operations.size());

Expand All @@ -829,17 +857,15 @@ struct RecordThreadState : ThreadState {
uint32_t slot = this->get_variable(ptr);
add_in_param(slot, vtype, test_uninit);
}
/// Helper function recording an output access, given the slot and \ref
/// VarType
/// Helper function recording an output access, given the slot and \ref VarType
void add_out_param(uint32_t slot, VarType vtype) {
ParamInfo info;
info.type = ParamType::Output;
info.slot = slot;
info.vtype = vtype;
add_param(info);
}
/// Helper function recording an output access, given the pointer and \ref
/// VarType
/// Helper function recording an output access, given the pointer and \ref VarType
void add_out_param(const void *ptr, VarType vtype) {
RecordVariable rv;
uint32_t slot = this->add_variable(ptr, rv);
Expand All @@ -851,23 +877,3 @@ struct RecordThreadState : ThreadState {
add_out_param(slot, (VarType) vtype);
}
};

void jitc_freeze_start(JitBackend backend, const uint32_t *inputs,
uint32_t n_inputs);

Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs,
uint32_t n_outputs);

void jitc_freeze_abort(JitBackend backend);

void jitc_freeze_destroy(Recording *recording);

bool jitc_freeze_pause(JitBackend backend);

bool jitc_freeze_resume(JitBackend backend);

void jitc_freeze_replay(Recording *recording, const uint32_t *inputs,
uint32_t *outputs);

int jitc_freeze_dry_run(Recording *recording, const uint32_t *inputs,
uint32_t *outputs);
4 changes: 2 additions & 2 deletions tests/record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ TEST_BOTH(04_deduplicating_output) {
}

/**
* This tests, weather it is possible to record multiple kernels in sequence.
* This tests, Whether it is possible to record multiple kernels in sequence.
* The input of the second kernel relies on the execution of the first.
* On LLVM, the correctness of barrier operations is therefore tested.
*/
Expand Down Expand Up @@ -296,7 +296,7 @@ TEST_BOTH(05_sequential_kernels) {
}

/**
* This tests, weather it is possible to record multiple independent kernels in
* This tests, Whether it is possible to record multiple independent kernels in
* the same recording.
* The variables of the kernels are of different size, therefore two kernels are
* generated. At replay these can be executed in parallel (LLVM) or sequence
Expand Down

0 comments on commit 7ea730c

Please sign in to comment.