diff --git a/include/drjit-core/jit.h b/include/drjit-core/jit.h index bdfdfc32..5145262f 100644 --- a/include/drjit-core/jit.h +++ b/include/drjit-core/jit.h @@ -2480,7 +2480,7 @@ extern JIT_EXPORT uint32_t jit_array_write(uint32_t target, uint32_t offset, extern JIT_EXPORT uint32_t jit_array_read(uint32_t source, uint32_t offset, uint32_t mask); -/// Opaque data structure, storing the recodring of a thread state +/// Opaque data structure, storing the recording of a thread state struct Recording; /** @@ -2491,9 +2491,12 @@ struct Recording; * The backend for which recording should be started. * * \param inputs - * An array of input variable indices. - * They have to be specified before starting the recording and can be - * changed when replaying the recording. + * An array of input variable indices, which have to be + * specified when starting the recording. + * Different indices, representing other variables, + * might be provided when replaying. + * This function borrows the indices and does not + * increment their refcount. * * \param n_inputs * The number of input variables for the recording @@ -2506,13 +2509,13 @@ jit_freeze_start(JitBackend backend, const uint32_t *inputs, uint32_t n_inputs); * recording. * * The recording is returned as an opaque pointer and has to be destroyed - * afterwards. + * afterwards using the \cjitc_freeze_destroy function. * * \param backend * The backend on which recording should be stoped. * * \param outputs - * An array of output variable indieces. + * An array of output variable indices. * When replaying these variables are returned from the replay function. * * \param n_outputs @@ -2527,7 +2530,7 @@ extern JIT_EXPORT Recording *jit_freeze_stop(JitBackend backend, * * Replaying a recording with different inputs results in different output * variables. - * They get put into the outputs array. + * Their variable indices get written into the outputs array. * * \param recording * The recording to replay given different inputs. @@ -2548,11 +2551,14 @@ extern JIT_EXPORT void jit_freeze_replay(Recording *recording, uint32_t *outputs); /** - * \brief Perform a dry run of a recording (if required), which does not perform - * any actual work. + * \brief Perform a dry run replay of a recording (if required), which does not + * perform any actual work. * * Dry running the recording, calculates the widths of all kernels, and might * return 0 if the function has to be re-recorded. + * This is required to catch cases where the size of an input variable changes + * the compiled kernels, for example when performing scatter reductions in LLVM + * mode. * * \param recording * The recording to replay given different inputs. @@ -2571,7 +2577,7 @@ extern JIT_EXPORT void jit_freeze_replay(Recording *recording, * No actual changes are done to the variables in dry-run mode. * * - * \return false if retracing the function is required, true otherwise + * \return 0 if retracing the function is required, 1 otherwise */ extern JIT_EXPORT int jit_freeze_dry_run(Recording *recording, const uint32_t *inputs, @@ -2579,27 +2585,35 @@ extern JIT_EXPORT int jit_freeze_dry_run(Recording *recording, /** * \brief Pause recording the ThreadState for this backend. - * Returns true if recording has already been paused. + * + * Returns a boolean indicating the pause state before calling + * this function, i.e. returns ``true`` if recording was already paused. + * If no recording is in progress, this function fails. * * \param backend * The backend for which to pause recording the thread state. */ -extern JIT_EXPORT bool jit_freeze_pause(JitBackend backend); +extern JIT_EXPORT int jit_freeze_pause(JitBackend backend); /** * \brief Resume recording the ThreadState for this backend. - * Returns true if recording has already been resumed - * or never paused. + * + * Returns a boolean indicating the pause state before calling + * this function, i.e. returns ``true`` if recording was already paused. + * If no recording is in progress, this function fails. * * \param backend * The backend for which to pause recording the thread state. */ -extern JIT_EXPORT bool jit_freeze_resume(JitBackend backend); +extern JIT_EXPORT int jit_freeze_resume(JitBackend backend); /** * \brief Abort recording the ThreadState for this backend. - * This will swap out the RecordThreadState for it's - * internal thread state, without saving the recording. + * + * This will abort the recording process and restore the state to the state it + * was in before starting the recording. + * Aborting a recording has the same effect as never starting the recording. + * If no recording is in progress, this function will run without effect. * * \param backend * The backend for which to abort recording the thread state. diff --git a/src/api.cpp b/src/api.cpp index 43fe32f8..ff98cd79 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -1516,12 +1516,12 @@ Recording *jit_freeze_stop(JitBackend backend, const uint32_t *outputs, return jitc_freeze_stop(backend, outputs, n_outputs); } -bool jit_freeze_pause(JitBackend backend) { +int jit_freeze_pause(JitBackend backend) { lock_guard guard(state.lock); return jitc_freeze_pause(backend); } -bool jit_freeze_resume(JitBackend backend) { +int jit_freeze_resume(JitBackend backend) { lock_guard guard(state.lock); return jitc_freeze_resume(backend); } diff --git a/src/record_ts.cpp b/src/record_ts.cpp index 0607c787..01f9e65f 100644 --- a/src/record_ts.cpp +++ b/src/record_ts.cpp @@ -26,14 +26,14 @@ struct ReplayVariable { // Tracks the size in bytes, of this allocation size_t data_size = 0; uint32_t index; - RecordVarInit init; + RecordedVarInit init; - ReplayVariable(RecordVariable &rv) { + ReplayVariable(RecordedVariable &rv) { this->index = rv.index; this->init = rv.init; - if (init == RecordVarInit::Captured) { + if (init == RecordedVarInit::Captured) { // copy the variable, so that it isn't changed this->index = jitc_var_copy(this->index); @@ -90,7 +90,7 @@ struct ReplayVariable { } /** * Allocates the data for this replay variable. - * If this is attempted twice, we test weather the allocated size is + * If this is attempted twice, we test Whether the allocated size is * sufficient and re-allocate the memory if necessary. */ void alloc(JitBackend backend, size_t dsize) { @@ -160,8 +160,8 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { replay_variables.clear(); - replay_variables.reserve(this->record_variables.size()); - for (RecordVariable &rv : this->record_variables) { + replay_variables.reserve(this->recorded_variables.size()); + for (RecordedVariable &rv : this->recorded_variables) { replay_variables.push_back(ReplayVariable(rv)); } @@ -474,9 +474,15 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { uint32_t size = in_rv.size(in_info.vtype); out_rv.alloc(backend, size, out_info.vtype); - if (dry_run) - jitc_fail( + if (dry_run){ + jitc_log( + LogLevel::Warn, "replay(): dry_run compress operation not supported!"); + // We return false (the dry run failed), which causes a + // re-recording of the frozen function. + return false; + } + uint32_t out_size = ts->compress((uint8_t *) in_rv.data, size, (uint32_t *) out_rv.data); @@ -648,7 +654,7 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { " -> s%u is_pointer=%u offset=%u", param.slot, param.pointer_access, param.extra.offset); - if (rv.init == RecordVarInit::Captured) { + if (rv.init == RecordedVarInit::Captured) { jitc_log(LogLevel::Debug, " captured"); jitc_log(LogLevel::Debug, " label=%s", jitc_var_label(rv.index)); @@ -726,7 +732,7 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { uint32_t slot = info.slot; ReplayVariable &rv = replay_variables[slot]; - if (rv.init == RecordVarInit::Input) { + if (rv.init == RecordedVarInit::Input) { // Use input variable jitc_log(LogLevel::Debug, " output %u: from slot s%u = input[%u]", i, slot, @@ -736,7 +742,7 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { uint32_t var_index = replay_inputs[rv.index]; jitc_var_inc_ref(var_index); outputs[i] = var_index; - } else if (rv.init == RecordVarInit::Captured) { + } else if (rv.init == RecordedVarInit::Captured) { jitc_log(LogLevel::Debug, " output %u: from slot s%u = captured[%u]", i, slot, rv.index); @@ -766,10 +772,10 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { } for (ReplayVariable &rv : replay_variables) { - if (rv.init == RecordVarInit::Captured) { + if (rv.init == RecordedVarInit::Captured) { jitc_var_dec_ref(rv.index); rv.data = 0; - } else if (rv.init == RecordVarInit::None && rv.data) { + } else if (rv.init == RecordedVarInit::None && rv.data) { rv.free(); } } @@ -777,11 +783,319 @@ int Recording::replay(const uint32_t *replay_inputs, uint32_t *outputs) { return true; } +void RecordThreadState::barrier() { + if (!paused()) { + uint32_t start = this->m_recording.dependencies.size(); + + Operation op; + op.type = OpType::Barrier; + op.dependency_range = std::pair(start, start); + this->m_recording.operations.push_back(op); + } + + pause_scope pause(this); + return this->m_internal->barrier(); +} + +Task *RecordThreadState::launch(Kernel kernel, KernelKey *key, + XXH128_hash_t hash, uint32_t size, + std::vector *kernel_params, + const std::vector *kernel_param_ids) { + if (!paused()) { + try { + record_launch(kernel, key, hash, size, kernel_params, + kernel_param_ids); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->launch(kernel, key, hash, size, kernel_params, + kernel_param_ids); +} + +/// Fill a device memory region with constants of a given type +void RecordThreadState::memset_async(void *ptr, uint32_t size, uint32_t isize, + const void *src) { + if (!paused()) { + try { + record_memset_async(ptr, size, isize, src); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->memset_async(ptr, size, isize, src); +} + +/// Mask compression +uint32_t RecordThreadState::compress(const uint8_t *in, uint32_t size, + uint32_t *out) { + if (!paused()) { + try { + record_compress(in, size, out); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->compress(in, size, out); +} + +/// Compute a permutation to reorder an integer array into discrete groups +uint32_t RecordThreadState::mkperm(const uint32_t *values, uint32_t size, + uint32_t bucket_count, uint32_t *perm, + uint32_t *offsets) { + if (!paused()) { + try { + record_mkperm(values, size, bucket_count, perm, offsets); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->mkperm(values, size, bucket_count, perm, offsets); +} + +/// Perform a synchronous copy operation +void RecordThreadState::memcpy(void *dst, const void *src, size_t size) { + jitc_log(LogLevel::Debug, "record(): memcpy(dst=%p, src=%p, size=%zu)", dst, + src, size); + pause_scope pause(this); + return this->m_internal->memcpy(dst, src, size); +} + +/// Perform an asynchronous copy operation +void RecordThreadState::memcpy_async(void *dst, const void *src, size_t size) { + jitc_log(LogLevel::Debug, + "record(): memcpy_async(dst=%p, src=%p, size=%zu)", dst, src, + size); + bool has_var = has_variable(src); + if (!paused() && has_var) { + + uint32_t src_id; + src_id = this->get_variable(src); + + RecordedVariable rv; + rv.last_memcpy = this->m_recording.operations.size() + 1; + uint32_t dst_id = this->add_variable(dst, rv); + + uint32_t start = this->m_recording.dependencies.size(); + add_in_param(src_id); + add_out_param(dst_id, + this->m_recording.recorded_variables[src_id].type); + uint32_t end = this->m_recording.dependencies.size(); + + Operation op; + op.type = OpType::MemcpyAsync; + op.dependency_range = std::pair(start, end); + op.size = size; + this->m_recording.operations.push_back(op); + } + { + pause_scope pause(this); + this->m_internal->memcpy_async(dst, src, size); + } + if (!paused() && !has_var) { + // If we did not know the source variable, this memcpy might be + // coming from \c jitc_call_upload. + // If that is the case, we have to capture the offset buffer. + // Since the pointer might be used, for example by an aggregate call + // (nested calls), we have to overwrite the RecordedVariable. + CallData *call = nullptr; + for (CallData *tmp : calls_assembled) { + if (tmp->offset == dst) { + call = tmp; + break; + } + } + if (call) { + capture_call_offset(dst, size); + jitc_log(LogLevel::Debug, " captured call offset"); + } else { + jitc_raise("record(): Tried to record a memcpy_async operation, " + "but the source pointer %p was not known.", + src); + } + } +} + +/// Sum over elements within blocks +void RecordThreadState::block_reduce(VarType vt, ReduceOp op, uint32_t size, + uint32_t block_size, const void *in, + void *out) { + if (!paused()) { + try { + record_block_reduce(vt, op, size, block_size, in, out); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->block_reduce(vt, op, size, block_size, in, out); +} + +void RecordThreadState::block_prefix_reduce(VarType vt, ReduceOp op, + uint32_t size, uint32_t block_size, + bool exclusive, bool reverse, + const void *in, void *out) { + if (!paused()) { + try { + record_block_prefix_reduce(vt, op, size, block_size, exclusive, + reverse, in, out); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->block_prefix_reduce(vt, op, size, block_size, + exclusive, reverse, in, out); +} + +/// Compute a dot product of two equal-sized arrays +void RecordThreadState::reduce_dot(VarType type, const void *ptr_1, + const void *ptr_2, uint32_t size, + void *out) { + jitc_raise("RecordThreadState::reduce_dot(): unsupported function recording!"); + pause_scope pause(this); + return this->m_internal->reduce_dot(type, ptr_1, ptr_2, size, out); +} + +/// Asynchronously update a single element in memory +void RecordThreadState::poke(void *dst, const void *src, uint32_t size) { + jitc_raise("RecordThreadState::poke(): unsupported function recording!"); + pause_scope pause(this); + return this->m_internal->poke(dst, src, size); +} + +void RecordThreadState::aggregate(void *dst, AggregationEntry *agg, + uint32_t size) { + if (!paused()) { + try { + record_aggregate(dst, agg, size); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + this->m_internal->aggregate(dst, agg, size); +} + +// Enqueue a function to be run on the host once backend computation is done +void RecordThreadState::enqueue_host_func(void (*callback)(void *), + void *payload) { + pause_scope pause(this); + return this->m_internal->enqueue_host_func(callback, payload); +} + +/// LLVM: reduce a variable that was previously expanded due to +/// dr.ReduceOp.Expand +void RecordThreadState::reduce_expanded(VarType vt, ReduceOp reduce_op, + void *data, uint32_t exp, + uint32_t size) { + if (!paused()) { + try { + record_reduce_expanded(vt, reduce_op, data, exp, size); + } catch (...) { + record_exception(); + } + } + pause_scope pause(this); + return this->m_internal->reduce_expanded(vt, reduce_op, data, exp, size); +} + +/** + * This function is called every time a pointer is freed using \ref + * jitc_free. It records the operation and removes the mapping from that + * pointer to the recorded variable. + * If the pointer is reused later by another call to \ref jitc_malloc, the + * \ref RecordThreadState.add_variable function will create a new variable + * and mapping from the pointer to it. + */ +void RecordThreadState::notify_free(const void *ptr) { + if (has_variable(ptr)) { + jitc_log(LogLevel::Debug, "record(): jitc_free(ptr=%p)", ptr); + + uint32_t start = this->m_recording.dependencies.size(); + add_in_param(ptr, VarType::Void, false); + uint32_t end = this->m_recording.dependencies.size(); + + Operation op; + op.type = OpType::Free; + op.dependency_range = std::pair(start, end); + + this->ptr_to_slot.erase(ptr); + } +} + +/** + * Adds an input of the recording. + * This is adds the slot of that variable to the \ref Recording.inputs + * vector. + */ +void RecordThreadState::add_input(uint32_t input) { + try { + uint32_t input_index = this->m_recording.inputs.size(); + Variable *v = jitc_var(input); + RecordedVariable rv; + rv.state = RecordedVarState::Input; + rv.init = RecordedVarInit::Input; + rv.index = input_index; + rv.type = (VarType) v->type; + uint32_t slot = this->add_variable(v->data, rv); + jitc_log(LogLevel::Debug, + "record(): Adding variable %u <%p> input %u to slot s%u", + input, v->data, input_index, slot); + this->m_recording.inputs.push_back(slot); + } catch (...) { + record_exception(); + } +} +/** + * Adds an output to the recording. + * The output can be seen as a final operation, which also has to infer the + * size of its input variables. + * Therefore, we record the full \ref ParamInfo for each output variable. + */ +void RecordThreadState::add_output(uint32_t output) { + try { + uint32_t output_index = this->m_recording.outputs.size(); + Variable *v = jitc_var(output); + uint32_t slot; + if (!has_variable(v->data)) { + slot = capture_variable(output); + } else { + slot = this->get_variable(v->data); + } + + jitc_log(LogLevel::Trace, + "record(): Adding variable %u output %u to slot s%u", output, + output_index, slot); + ParamInfo info; + info.slot = slot; + info.vtype = (VarType) v->type; + this->m_recording.outputs.push_back(info); + } catch (...) { + record_exception(); + } +} + +bool RecordThreadState::pause() { + bool tmp = m_paused; + m_paused = true; + return tmp; +} +bool RecordThreadState::resume() { + bool tmp = m_paused; + m_paused = false; + return tmp; +} + void RecordThreadState::record_expand(uint32_t index) { Variable *v = jitc_var(index); uint32_t dst_slot = get_variable(v->data); - const RecordVariable &rv = this->m_recording.record_variables[dst_slot]; + const RecordedVariable &rv = this->m_recording.recorded_variables[dst_slot]; if (rv.last_memset == 0) jitc_fail("record(): Could not infer last memset operation of r%u s%u, " "to construct expand operation!", @@ -863,8 +1177,13 @@ void RecordThreadState::record_launch( } } +#ifdef NDEBUG jitc_log(LogLevel::Info, "record(): recording kernel %u %016llx", this->m_recording.n_kernels++, (unsigned long long) hash.high64); +#else + jitc_log(LogLevel::Info, "record(): recording kernel %016llx", + (unsigned long long) hash.high64); +#endif uint32_t start = this->m_recording.dependencies.size(); for (uint32_t param_index = 0; param_index < kernel_param_ids->size(); @@ -918,7 +1237,7 @@ void RecordThreadState::record_launch( } } else if (param_type == ParamType::Output) { - RecordVariable rv; + RecordedVariable rv; slot = this->add_variable(ptr, rv); } else jitc_fail("Parameter Type not supported!"); @@ -970,7 +1289,7 @@ void RecordThreadState::record_launch( if (uses_optix) { op.uses_optix = true; - scoped_pause(); + pause_scope pause(this); // Copy SBT op.sbt = new OptixShaderBindingTable(); std::memcpy(op.sbt, this->optix_sbt, sizeof(OptixShaderBindingTable)); @@ -1043,7 +1362,7 @@ void RecordThreadState::record_memset_async(void *ptr, uint32_t size, "only isize<=8 is supported!", isize); - RecordVariable rv; + RecordedVariable rv; rv.last_memset = this->m_recording.operations.size() + 1; uint32_t ptr_id = this->add_variable(ptr, rv); @@ -1158,7 +1477,7 @@ void RecordThreadState::record_aggregate(void *dst, AggregationEntry *agg, jitc_log(LogLevel::Debug, "record(): aggregate(dst=%p, size=%u)", dst, size); - uint32_t dst_id = this->add_variable(dst, RecordVariable{}); + uint32_t dst_id = this->add_variable(dst, RecordedVariable{}); jitc_log(LogLevel::Debug, " <- s%u", dst_id); @@ -1201,7 +1520,7 @@ void RecordThreadState::record_aggregate(void *dst, AggregationEntry *agg, // uploaded. // We therefore defer the offset buffer capture to the // memcpy_async operation. - uint32_t slot = add_variable(p.src, RecordVariable()); + uint32_t slot = add_variable(p.src, RecordedVariable()); jitc_log(LogLevel::Debug, " var at slot s%u", slot); @@ -1246,7 +1565,7 @@ void RecordThreadState::record_reduce_expanded(VarType vt, ReduceOp reduce_op, "size=%u)", (uint32_t) vt, (uint32_t) reduce_op, data, exp, size); - uint32_t data_id = this->add_variable(data, RecordVariable{}); + uint32_t data_id = this->add_variable(data, RecordedVariable{}); uint32_t start = this->m_recording.dependencies.size(); add_out_param(data_id, vt); @@ -1263,12 +1582,18 @@ void RecordThreadState::record_reduce_expanded(VarType vt, ReduceOp reduce_op, } void Recording::validate() { - for (uint32_t i = 0; i < this->record_variables.size(); i++) { - RecordVariable &rv = this->record_variables[i]; - if (rv.state == RecordVarState::Uninit) { + for (uint32_t i = 0; i < this->recorded_variables.size(); i++) { + RecordedVariable &rv = this->recorded_variables[i]; + if (rv.state == RecordedVarState::Uninitialized) { +#ifdef NDEBUG jitc_fail("record(): Variable at slot s%u %p was left in an " "uninitialized state!", i, rv.ptr); +#else + jitc_fail("record(): Variable at slot s%u was left in an " + "uninitialized state!", + i); +#endif } } } @@ -1361,6 +1686,206 @@ Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs, } } +/** + * This captures the offset buffer of a vcall in a kernel. + * The offset buffer describes where in the data buffer of that vcall the + * variables or pointers to variables, for that vcall are stored. + * It should not change between invocations and we should therefore be able + * to capture it and reuse it when replaying the kernel. + */ +uint32_t RecordThreadState::capture_call_offset(const void *ptr, size_t dsize) { + uint32_t size = dsize / type_size[(uint32_t) VarType::UInt64]; + + AllocType atype = + backend == JitBackend::CUDA ? AllocType::Device : AllocType::HostAsync; + uint64_t *data = (uint64_t *) jitc_malloc(atype, dsize); + jitc_memcpy(backend, data, ptr, dsize); + + uint32_t data_var = + jitc_var_mem_map(backend, VarType::UInt64, data, size, true); + + RecordedVariable rv; +#ifdef NDEBUG + rv.ptr = ptr; +#endif + rv.state = RecordedVarState::Captured; + rv.init = RecordedVarInit::Captured; + rv.index = data_var; + + uint32_t slot; + auto it = this->ptr_to_slot.find(ptr); + if (it == this->ptr_to_slot.end()) { + slot = this->m_recording.recorded_variables.size(); + + this->m_recording.recorded_variables.push_back(rv); + + this->ptr_to_slot.insert({ ptr, slot }); + } else { + slot = it.value(); + RecordedVariable &old = this->m_recording.recorded_variables[slot]; + if (old.init != RecordedVarInit::None) + jitc_fail("record(): Tried to overwrite an initialized variable " + "with an offset buffer!"); + + this->m_recording.recorded_variables[slot] = rv; + } + + return slot; +} + +/** + * This function tries to capture a variable that is not known to the + * recording \c ThreadState. + * This is unsupported for now and raises an exception. + */ +uint32_t RecordThreadState::capture_variable(uint32_t index, + const void * /*ptr*/, + bool /*remember*/, bool test_scope, + bool /*overwrite*/) { + + pause_scope pause(this); + Variable *v = jitc_var(index); + if (v->scope < this->m_internal->scope && test_scope) { + jitc_raise("record(): Variable r%u[%u] -> %p, label=%s, was created " + "before recording was started, but it was " + "not specified as an input variable! This can happen if a " + "input type is not fully traversable, for example when not " + "specifying a member in DRJIT_STRUCT, but using it in the " + "frozen function.", + index, v->size, v->data, jitc_var_label(index)); + } + + jitc_raise("record(): Variable r%u[%u] -> %p, label=%s, was created while " + "recording, but it was not created by a supported operation. " + "This can happen if a memory region was created outside of " + "Dr.Jit but mapped to a Dr.Jit variable.", + index, v->size, v->data, jitc_var_label(index)); + + return 0; +} + +/** + * Add information about a variable, deduplicating it and returning the slot + * in the `variables` field of the recording. + * Information is combined when the variable has already been added. + * This is used by the input variables of a kernel. + */ +uint32_t RecordThreadState::add_variable(const void *ptr, RecordedVariable rv) { +#ifdef NDEBUG + rv.ptr = ptr; +#endif + auto it = this->ptr_to_slot.find(ptr); + + if (it == this->ptr_to_slot.end()) { + uint32_t slot = this->m_recording.recorded_variables.size(); + + this->m_recording.recorded_variables.push_back(rv); + + this->ptr_to_slot.insert({ ptr, slot }); + + return slot; + } else { + uint32_t slot = it.value(); + + this->m_recording.recorded_variables[slot] |= rv; + + return slot; + } +} + +/// Return the slot index given the data pointer of a variable. +/// This fails if the variable has not been previously added. +uint32_t RecordThreadState::get_variable(const void *ptr) { + auto it = this->ptr_to_slot.find(ptr); + + if (it == this->ptr_to_slot.end()) + jitc_fail("Failed to find the slot corresponding to the variable " + "with data at %p", + ptr); + + return it.value(); +} + +/// Test if the ThreadState knows this \c ptr +bool RecordThreadState::has_variable(const void *ptr) { + auto it = this->ptr_to_slot.find(ptr); + + return it != this->ptr_to_slot.end(); +} + +/** + * Adds a parameter access to the \ref dependencies vector. + * This also modifies the state of the \ref RecordVariable that was + * accessed. + */ +void RecordThreadState::add_param(ParamInfo info) { + RecordedVariable &rv = this->m_recording.recorded_variables[info.slot]; + if (info.type == ParamType::Output) { + + jitc_log(LogLevel::Debug, " <- param s%u", info.slot); + + if (info.vtype != VarType::Void) + rv.type = info.vtype; + + rv.state = RecordedVarState::OpOutput; + + } else if (info.type == ParamType::Input) { + + jitc_log(LogLevel::Debug, " -> param s%u", info.slot); + + if (info.test_uninit && rv.state == RecordedVarState::Uninitialized) + jitc_raise("record(): Varaible at slot s%u was read from by " + "operation o%u, but has not yet been initialized! " + "This can occur if the variable was not part of " + "the input but is used by a recorded operation, for " + "example if it was not specified as a member in a " + "DRJIT_STRUCT but used in the frozen function.", + info.slot, + (uint32_t) this->m_recording.operations.size()); + + if (info.vtype == VarType::Void) + info.vtype = rv.type; + } + + this->m_recording.dependencies.push_back(info); +} +/// Helper function for recording input parameters given the slot. +void RecordThreadState::add_in_param(uint32_t slot, VarType vtype, + bool test_uninit) { + ParamInfo info; + info.type = ParamType::Input; + info.slot = slot; + info.test_uninit = test_uninit; + info.vtype = vtype; + add_param(info); +} +/// Helper function recording input access given the pointer. +void RecordThreadState::add_in_param(const void *ptr, VarType vtype, + bool test_uninit) { + uint32_t slot = this->get_variable(ptr); + add_in_param(slot, vtype, test_uninit); +} +/// Helper function recording an output access, given the slot and \ref VarType +void RecordThreadState::add_out_param(uint32_t slot, VarType vtype) { + ParamInfo info; + info.type = ParamType::Output; + info.slot = slot; + info.vtype = vtype; + add_param(info); +} +/// Helper function recording an output access, given the pointer and \ref +/// VarType +void RecordThreadState::add_out_param(const void *ptr, VarType vtype) { + RecordedVariable rv; + uint32_t slot = this->add_variable(ptr, rv); + add_out_param(slot, vtype); +} +/// Helper function recording an output access, given the pointer and the +/// uint32_t representation of a \ref VarType +void RecordThreadState::add_out_param(uint32_t slot, uint32_t vtype) { + add_out_param(slot, (VarType) vtype); +} + void jitc_freeze_abort(JitBackend backend) { if (RecordThreadState *rts = dynamic_cast(thread_state(backend)); @@ -1387,15 +1912,15 @@ void jitc_freeze_abort(JitBackend backend) { } void jitc_freeze_destroy(Recording *recording) { - for (RecordVariable &rv : recording->record_variables) { - if (rv.init == RecordVarInit::Captured) { + for (RecordedVariable &rv : recording->recorded_variables) { + if (rv.init == RecordedVarInit::Captured) { jitc_var_dec_ref(rv.index); } } delete recording; } -bool jitc_freeze_pause(JitBackend backend) { +int jitc_freeze_pause(JitBackend backend) { if (RecordThreadState *rts = dynamic_cast(thread_state(backend)); @@ -1403,22 +1928,22 @@ bool jitc_freeze_pause(JitBackend backend) { return rts->pause(); } else { jitc_fail( - "jit_record_stop(): Tried to pause recording a thread state " + "jit_freeze_pause(): Tried to pause recording a thread state " "for backend %u, while no recording was started for this backend. " - "Try to start the recording with jit_record_start.", + "Try to start the recording with jit_freeze_start.", (uint32_t) backend); } } -bool jitc_freeze_resume(JitBackend backend) { +int jitc_freeze_resume(JitBackend backend) { if (RecordThreadState *rts = dynamic_cast(thread_state(backend)); rts != nullptr) { return rts->resume(); } else { jitc_fail( - "jit_record_stop(): Tried to resume recording a thread state " + "jit_freeze_resume(): Tried to resume recording a thread state " "for backend %u, while no recording was started for this backend. " - "Try to start the recording with jit_record_start.", + "Try to start the recording with jit_freeze_start.", (uint32_t) backend); } } diff --git a/src/record_ts.h b/src/record_ts.h index 478bf8c8..0f1f100c 100644 --- a/src/record_ts.h +++ b/src/record_ts.h @@ -9,6 +9,26 @@ #include "var.h" #include +void jitc_freeze_start(JitBackend backend, const uint32_t *inputs, + uint32_t n_inputs); + +Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs, + uint32_t n_outputs); + +void jitc_freeze_abort(JitBackend backend); + +void jitc_freeze_destroy(Recording *recording); + +int jitc_freeze_pause(JitBackend backend); + +int jitc_freeze_resume(JitBackend backend); + +void jitc_freeze_replay(Recording *recording, const uint32_t *inputs, + uint32_t *outputs); + +int jitc_freeze_dry_run(Recording *recording, const uint32_t *inputs, + uint32_t *outputs); + /// HashMap, mapping an allocation to a recorded variable using PtrToSlot = tsl::robin_map; @@ -63,11 +83,12 @@ struct Operation { struct { /// The reduce type of a prefix reduction operation ReduceOp rtype; - /// Weather a prefix sum operation is exclusive + /// Whether a prefix sum operation is exclusive bool exclusive; bool reverse; } prefix_reduce; /// The bucket count for the mkperm operation + /// The function has to be re-recorded when the bucket count changes. uint32_t bucket_count; /// Additional data such as the source of memset uint64_t data; @@ -77,13 +98,15 @@ struct Operation { /// Records the size of the largest input variable (directly accessed or /// through a pointer if the kernel has no direct inputs). size_t input_size = 0; - /// Weather this operation is enabled. We might have to disable some + /// Whether this operation is enabled. We might have to disable some /// operations after the fact, and removing them from the Recording would be /// more complicated. bool enabled = true; /// Does this operation use optix? bool uses_optix = false; - /// A copy of the shader binding table, used by the kernel. + /// A copy of the shader binding table including a deepcopy of its hit- and + /// miss- groups, used by the kernel. The callables are filled in by the \c + /// CUDAThreadState::launch function. OptixShaderBindingTable *sbt; }; @@ -91,20 +114,24 @@ struct Operation { /// /// Output variables are only tracked through the outputs array, as this /// information is only needed when constructing the output variables. -/// -enum class RecordVarState { +enum class RecordedVarState { /// This variable was not initialized - Uninit, + Uninitialized, /// This variable has been created by an operation OpOutput, /// This variable is part of the function input Input, - /// This variable has been captured + /// This variable has been captured i.e. it is copied and part of the + /// recording. + /// For example, the offset buffer of a vcall should not change between + /// recording and replay and can be copied. + /// Captured variables are immutable and copied when replaying, so that they + /// are not changed by the replaying kernels. Captured, }; /// Records how this variable was initialized -enum class RecordVarInit { +enum class RecordedVarInit { None, Captured, Input, @@ -116,37 +143,39 @@ enum class RecordVarInit { * An evaluated variable is tracked by the memory it refers to. * This struct records the memory region from the time it was first used in one * of the operations, until it is freed by `jit_free`. - * The memory also has to be allocated using `jit_malloc`, otherwise it cannot + * The memory must have been allocated using `jit_malloc`, otherwise it cannot * be tracked. */ -struct RecordVariable { +struct RecordedVariable { /// Stores index into input array if variable is input or index of captured /// variable uint32_t index = 0; /// Records how this variable has been initialized - RecordVarInit init = RecordVarInit::None; + RecordedVarInit init = RecordedVarInit::None; - /// Tracks the last memset and memcpy operations necessary for recording the - /// expand operation. + /// Tracks the last memset and memcpy operations by indexing into the + /// operations list, necessary for recording the expand operation. uint32_t last_memset = 0; uint32_t last_memcpy = 0; /// Tracks the current state of a variable - RecordVarState state = RecordVarState::Uninit; + RecordedVarState state = RecordedVarState::Uninitialized; /// Tracks the current type of the variable VarType type = VarType::Void; +#ifdef NDEBUG /// Tracks the pointer of the variable for debug purposes const void *ptr; +#endif - RecordVariable() {} + RecordedVariable() {} /** * Not all information about variables might be known right away (see * memcpy). When new information about the variable is available, we can add * it to the already saved RecordVariable. */ - RecordVariable &operator|=(const RecordVariable &rhs) { - if (this->state == RecordVarState::Uninit) { + RecordedVariable &operator|=(const RecordedVariable &rhs) { + if (this->state == RecordedVarState::Uninitialized) { this->state = rhs.state; this->index = rhs.index; } @@ -199,7 +228,7 @@ struct ParamInfo { * \brief Represents a frozen function recording. And can be used to replay it. */ struct Recording { - /// Weather this recording requires a dryrun, to discover the size of + /// Whether this recording requires a dryrun, to discover the size of /// certain operations. bool requires_dry_run = false; @@ -207,10 +236,10 @@ struct Recording { /// Each variable refers to an allocation. /// If an allocation reuses a memory region, it is referred to by a separate /// variable. - std::vector record_variables; + std::vector recorded_variables; - /// This vector maps the flat and deduplicated inputs to the frozen to their - /// variables in the \ref record_variables array. + /// This vector maps the flat and deduplicated inputs to the frozen function + /// to their variables in the \ref record_variables array. std::vector inputs; /// This vector maps the flat outputs of the frozen function to their /// recorded variables and how they have been accessed. @@ -233,8 +262,10 @@ struct Recording { /// function was recorded. int replay(const uint32_t *replay_input, uint32_t *outputs); +#ifdef NDEBUG /// Counter, counting the number of kernels for debugging. uint32_t n_kernels = 0; +#endif /// This function is called after recording and checks that the recording is /// valid i.e. that no variables where left uninitialized. @@ -250,7 +281,31 @@ struct Recording { * operations performed with it. */ struct RecordThreadState : ThreadState { + + /// The last exception thrown while recording. + /// This is required to re-throw the exception at the end. + std::exception_ptr m_exception = nullptr; + /// The internal ThreadState, that is wrapped by this RecordThreadState. + ThreadState *m_internal; + + /// The recording, produced when recording this ThreadState. + Recording m_recording; + +protected: + /// Indicates that recording has been paused. + bool m_paused = false; + + /// Mapping from data pointer of a variable to a index into the slot of the + /// recording. + PtrToSlot ptr_to_slot; + +public: + /** + * Constructs a new RecordThreadState, wrapping an internal ThreadState. + * This does not take ownership of the internal ThreadState + * and it has to be kept alive until the RecordThreadState is destroyed. + */ RecordThreadState(ThreadState *internal) { this->context = internal->context; this->stream = internal->stream; @@ -278,222 +333,55 @@ struct RecordThreadState : ThreadState { this->scope = internal->scope; }; - void barrier() override { - if (!paused()) { - uint32_t start = this->m_recording.dependencies.size(); - - Operation op; - op.type = OpType::Barrier; - op.dependency_range = std::pair(start, start); - this->m_recording.operations.push_back(op); - } - - scoped_pause(); - return this->m_internal->barrier(); - } + void barrier() override; Task *launch(Kernel kernel, KernelKey *key, XXH128_hash_t hash, uint32_t size, std::vector *kernel_params, - const std::vector *kernel_param_ids) override { - if (!paused()) { - try { - record_launch(kernel, key, hash, size, kernel_params, - kernel_param_ids); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->launch(kernel, key, hash, size, kernel_params, - kernel_param_ids); - } + const std::vector *kernel_param_ids) override; /// Fill a device memory region with constants of a given type void memset_async(void *ptr, uint32_t size, uint32_t isize, - const void *src) override { - if (!paused()) { - try { - record_memset_async(ptr, size, isize, src); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->memset_async(ptr, size, isize, src); - } + const void *src) override; /// Mask compression uint32_t compress(const uint8_t *in, uint32_t size, - uint32_t *out) override { - if (!paused()) { - try { - record_compress(in, size, out); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->compress(in, size, out); - } + uint32_t *out) override; /// Compute a permutation to reorder an integer array into discrete groups uint32_t mkperm(const uint32_t *values, uint32_t size, uint32_t bucket_count, uint32_t *perm, - uint32_t *offsets) override { - if (!paused()) { - try { - record_mkperm(values, size, bucket_count, perm, offsets); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->mkperm(values, size, bucket_count, perm, - offsets); - } + uint32_t *offsets) override; /// Perform a synchronous copy operation - void memcpy(void *dst, const void *src, size_t size) override { - jitc_log(LogLevel::Debug, "record(): memcpy(dst=%p, src=%p, size=%zu)", - dst, src, size); - scoped_pause(); - return this->m_internal->memcpy(dst, src, size); - } + void memcpy(void *dst, const void *src, size_t size) override; /// Perform an asynchronous copy operation - void memcpy_async(void *dst, const void *src, size_t size) override { - jitc_log(LogLevel::Debug, - "record(): memcpy_async(dst=%p, src=%p, size=%zu)", dst, src, - size); - bool has_var = has_variable(src); - if (!paused() && (has_var)) { - - uint32_t src_id; - src_id = this->get_variable(src); - - RecordVariable rv; - rv.last_memcpy = this->m_recording.operations.size() + 1; - uint32_t dst_id = this->add_variable(dst, rv); - - uint32_t start = this->m_recording.dependencies.size(); - add_in_param(src_id); - add_out_param(dst_id, - this->m_recording.record_variables[src_id].type); - uint32_t end = this->m_recording.dependencies.size(); - - Operation op; - op.type = OpType::MemcpyAsync; - op.dependency_range = std::pair(start, end); - op.size = size; - this->m_recording.operations.push_back(op); - } - { - scoped_pause(); - this->m_internal->memcpy_async(dst, src, size); - } - if (!paused() && !has_var) { - // If we did not know the source variable, this memcpy might be - // coming from a `jitc_call_upload` call. - // If that is the case, we have to capture the offset buffer. - // Since the pointer might be used, for example by an aggregate call - // (nested calls), we have to overwrite the RecordVariable. - // - CallData *call = nullptr; - for (CallData *tmp : calls_assembled) { - if (tmp->offset == dst) { - call = tmp; - break; - } - } - if (call) { - capture_call_offset(dst, size); - jitc_log(LogLevel::Debug, " captured call offset"); - } - } - } + void memcpy_async(void *dst, const void *src, size_t size) override; /// Sum over elements within blocks void block_reduce(VarType vt, ReduceOp op, uint32_t size, - uint32_t block_size, const void *in, void *out) override { - if (!paused()) { - try { - record_block_reduce(vt, op, size, block_size, in, out); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->block_reduce(vt, op, size, block_size, in, - out); - } + uint32_t block_size, const void *in, void *out) override; void block_prefix_reduce(VarType vt, ReduceOp op, uint32_t size, uint32_t block_size, bool exclusive, bool reverse, - const void *in, void *out) override { - if (!paused()) { - try { - record_block_prefix_reduce(vt, op, size, block_size, exclusive, - reverse, in, out); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->block_prefix_reduce( - vt, op, size, block_size, exclusive, reverse, in, out); - } + const void *in, void *out) override; /// Compute a dot product of two equal-sized arrays void reduce_dot(VarType type, const void *ptr_1, const void *ptr_2, - uint32_t size, void *out) override { - jitc_log( - LogLevel::Warn, - "RecordThreadState::reduce_dot(): unsupported function recording!"); - scoped_pause(); - return this->m_internal->reduce_dot(type, ptr_1, ptr_2, size, out); - } + uint32_t size, void *out) override; /// Asynchronously update a single element in memory - void poke(void *dst, const void *src, uint32_t size) override { - jitc_log(LogLevel::Warn, - "RecordThreadState::poke(): unsupported function recording!"); - scoped_pause(); - return this->m_internal->poke(dst, src, size); - } + void poke(void *dst, const void *src, uint32_t size) override; - void aggregate(void *dst, AggregationEntry *agg, uint32_t size) override { - if (!paused()) { - try { - record_aggregate(dst, agg, size); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - this->m_internal->aggregate(dst, agg, size); - } + void aggregate(void *dst, AggregationEntry *agg, uint32_t size) override; // Enqueue a function to be run on the host once backend computation is done - void enqueue_host_func(void (*callback)(void *), void *payload) override { - scoped_pause(); - return this->m_internal->enqueue_host_func(callback, payload); - } + void enqueue_host_func(void (*callback)(void *), void *payload) override; /// LLVM: reduce a variable that was previously expanded due to /// dr.ReduceOp.Expand void reduce_expanded(VarType vt, ReduceOp reduce_op, void *data, - uint32_t exp, uint32_t size) override { - if (!paused()) { - try { - record_reduce_expanded(vt, reduce_op, data, exp, size); - } catch (...) { - record_exception(); - } - } - scoped_pause(); - return this->m_internal->reduce_expanded(vt, reduce_op, data, exp, - size); - } + uint32_t exp, uint32_t size) override; /** * This function is called every time a pointer is freed using \ref @@ -503,21 +391,7 @@ struct RecordThreadState : ThreadState { * \ref RecordThreadState.add_variable function will create a new variable * and mapping from the pointer to it. */ - void notify_free(const void *ptr) override { - if (has_variable(ptr)) { - jitc_log(LogLevel::Debug, "record(): jitc_free(ptr=%p)", ptr); - - uint32_t start = this->m_recording.dependencies.size(); - add_in_param(ptr, VarType::Void, false); - uint32_t end = this->m_recording.dependencies.size(); - - Operation op; - op.type = OpType::Free; - op.dependency_range = std::pair(start, end); - - this->ptr_to_slot.erase(ptr); - } - } + void notify_free(const void *ptr) override; ~RecordThreadState() {} @@ -526,63 +400,17 @@ struct RecordThreadState : ThreadState { * This is adds the slot of that variable to the \ref Recording.inputs * vector. */ - void add_input(uint32_t input) { - try { - uint32_t input_index = this->m_recording.inputs.size(); - Variable *v = jitc_var(input); - RecordVariable rv; - rv.state = RecordVarState::Input; - rv.init = RecordVarInit::Input; - rv.index = input_index; - rv.type = (VarType) v->type; - uint32_t slot = this->add_variable(v->data, rv); - jitc_log(LogLevel::Info, - "record(): Adding variable %u <%p> input %u to slot s%u", - input, v->data, input_index, slot); - this->m_recording.inputs.push_back(slot); - } catch (...) { - record_exception(); - } - } + void add_input(uint32_t input); /** * Adds an output to the recording. * The output can be seen as a final operation, which also has to infer the - * size of it's input variables. + * size of its input variables. * Therefore, we record the full \ref ParamInfo for each output variable. */ - void add_output(uint32_t output) { - try { - uint32_t output_index = this->m_recording.outputs.size(); - Variable *v = jitc_var(output); - uint32_t slot; - if (!has_variable(v->data)) { - slot = capture_variable(output); - } else { - slot = this->get_variable(v->data); - } - - jitc_log(LogLevel::Trace, - "record(): Adding variable %u output %u to slot s%u", - output, output_index, slot); - ParamInfo info; - info.slot = slot; - info.vtype = (VarType) v->type; - this->m_recording.outputs.push_back(info); - } catch (...) { - record_exception(); - } - } + void add_output(uint32_t output); - bool pause() { - bool tmp = m_paused; - m_paused = true; - return tmp; - } - bool resume() { - bool tmp = m_paused; - m_paused = false; - return tmp; - } + bool pause(); + bool resume(); /// A helper scope, pausing recording. struct pause_scope { @@ -593,8 +421,6 @@ struct RecordThreadState : ThreadState { ~pause_scope() { rts->m_paused = tmp; } }; - pause_scope scoped_pause() { return pause_scope(this); } - /// Is recording paused or has an exception been thrown? /// Recording any operation should be gated by this function. inline bool paused() { return m_paused || m_exception; } @@ -606,19 +432,6 @@ struct RecordThreadState : ThreadState { m_exception = std::current_exception(); } - bool m_paused = false; - - std::exception_ptr m_exception = nullptr; - - ThreadState *m_internal; - - Recording m_recording; - -private: - // Mapping from data pointer of a variable to a index into the slot of the - // recording. - PtrToSlot ptr_to_slot; - /** * Record the Expand operation, corresponding to the `jitc_var_expand` call, * with which the variable `index` has been expanded. @@ -661,75 +474,24 @@ struct RecordThreadState : ThreadState { * variables or pointers to variables, for that vcall are stored. * It should not change between invocations and we should therefore be able * to capture it and reuse it when replaying the kernel. + * + * \param ptr + * the pointer to the offset buffer + * + * \param dsize + * the size in bytes of the offset buffer + * */ - uint32_t capture_call_offset(const void *ptr, size_t dsize) { - uint32_t size; - size = dsize / type_size[(uint32_t) VarType::UInt64]; - - AllocType atype = backend == JitBackend::CUDA ? AllocType::Device - : AllocType::HostAsync; - uint64_t *data = (uint64_t *) jitc_malloc(atype, dsize); - jitc_memcpy(backend, data, ptr, dsize); - - uint32_t data_var = - jitc_var_mem_map(backend, VarType::UInt64, data, size, true); - - RecordVariable rv; - rv.ptr = ptr; - rv.state = RecordVarState::Captured; - rv.init = RecordVarInit::Captured; - rv.index = data_var; - - uint32_t slot; - auto it = this->ptr_to_slot.find(ptr); - if (it == this->ptr_to_slot.end()) { - slot = this->m_recording.record_variables.size(); - - this->m_recording.record_variables.push_back(rv); - - this->ptr_to_slot.insert({ ptr, slot }); - } else { - slot = it.value(); - RecordVariable &old = this->m_recording.record_variables[slot]; - if (old.init != RecordVarInit::None) - jitc_fail("record(): Tried to overwrite a initialized variable " - "with an offset buffer!"); - - this->m_recording.record_variables[slot] = rv; - } - - return slot; - } + uint32_t capture_call_offset(const void *ptr, size_t dsize); /** * This function tries to capture a variable that is not known to the * recording \c ThreadState. * This is unsupported for now and raises an exception. */ - uint32_t capture_variable(uint32_t index, const void */*ptr*/ = nullptr, + uint32_t capture_variable(uint32_t index, const void * /*ptr*/ = nullptr, bool /*remember*/ = true, bool test_scope = true, - bool /*overwrite*/ = false) { - - scoped_pause(); - Variable *v = jitc_var(index); - if (v->scope < this->m_internal->scope && test_scope) { - jitc_raise( - "record(): Variable r%u[%u] -> %p, label=%s, was created " - "before recording was started, but it was " - "not speciefied as an input variable! This can happen if a " - "input type is not fully traversavle, for example when not " - "specifying a member in DRJIT_STRUCT, but using it in the " - "frozen function.", - index, v->size, v->data, jitc_var_label(index)); - } - - jitc_raise("record(): Variable r%u[%u] -> %p, label=%s, data=%s, of " - "size > 1 was created while recording.", - index, v->size, v->data, jitc_var_label(index), - jitc_var_str(index)); - - return 0; - } + bool /*overwrite*/ = false); /** * Add information about a variable, deduplicating it and returning the slot @@ -737,137 +499,32 @@ struct RecordThreadState : ThreadState { * Information is combined when the variable has already been added. * This is used by the input variables of a kernel. */ - uint32_t add_variable(const void *ptr, RecordVariable rv) { - - rv.ptr = ptr; - auto it = this->ptr_to_slot.find(ptr); - - if (it == this->ptr_to_slot.end()) { - uint32_t slot = this->m_recording.record_variables.size(); - - this->m_recording.record_variables.push_back(rv); - - this->ptr_to_slot.insert({ ptr, slot }); - - return slot; - } else { - uint32_t slot = it.value(); - - this->m_recording.record_variables[slot] |= rv; - - return slot; - } - } + uint32_t add_variable(const void *ptr, RecordedVariable rv); /// Return the slot index given the data pointer of a variable. - /// This fails if the variable has not been added. - uint32_t get_variable(const void *ptr) { - auto it = this->ptr_to_slot.find(ptr); - - if (it == this->ptr_to_slot.end()) - jitc_fail("Failed to find the slot corresponding to the variable " - "with data at %p", - ptr); - - return it.value(); - } + /// This fails if the variable has not been previously added. + uint32_t get_variable(const void *ptr); /// Test if the ThreadState knows this \c ptr - bool has_variable(const void *ptr) { - auto it = this->ptr_to_slot.find(ptr); - - return it != this->ptr_to_slot.end(); - } + bool has_variable(const void *ptr); /** * Adds a parameter access to the \ref dependencies vector. * This also modifies the state of the \ref RecordVariable that was * accessed. */ - void add_param(ParamInfo info) { - RecordVariable &rv = this->m_recording.record_variables[info.slot]; - if (info.type == ParamType::Output) { - - jitc_log(LogLevel::Debug, " <- param s%u", info.slot); - - if (info.vtype != VarType::Void) - rv.type = info.vtype; - - rv.state = RecordVarState::OpOutput; - - } else if (info.type == ParamType::Input) { - - jitc_log(LogLevel::Debug, " -> param s%u", info.slot); - - if (info.test_uninit && rv.state == RecordVarState::Uninit) - jitc_raise("record(): Varaible at slot s%u was read from by " - "operation o%u, but has not yet been initialized! " - "This can happen if the variable was not part of " - "the input but is used by an recorded operation.", - info.slot, - (uint32_t) this->m_recording.operations.size()); - - if (info.vtype == VarType::Void) - info.vtype = rv.type; - } - - this->m_recording.dependencies.push_back(info); - } + void add_param(ParamInfo info); /// Helper function for recording input parameters given the slot. void add_in_param(uint32_t slot, VarType vtype = VarType::Void, - bool test_uninit = true) { - ParamInfo info; - info.type = ParamType::Input; - info.slot = slot; - info.test_uninit = test_uninit; - info.vtype = vtype; - add_param(info); - } + bool test_uninit = true); /// Helper function recording input access given the pointer. void add_in_param(const void *ptr, VarType vtype = VarType::Void, - bool test_uninit = true) { - uint32_t slot = this->get_variable(ptr); - add_in_param(slot, vtype, test_uninit); - } - /// Helper function recording an output access, given the slot and \ref - /// VarType - void add_out_param(uint32_t slot, VarType vtype) { - ParamInfo info; - info.type = ParamType::Output; - info.slot = slot; - info.vtype = vtype; - add_param(info); - } - /// Helper function recording an output access, given the pointer and \ref - /// VarType - void add_out_param(const void *ptr, VarType vtype) { - RecordVariable rv; - uint32_t slot = this->add_variable(ptr, rv); - add_out_param(slot, vtype); - } + bool test_uninit = true); + /// Helper function recording an output access, given the slot and \ref VarType + void add_out_param(uint32_t slot, VarType vtype); + /// Helper function recording an output access, given the pointer and \ref VarType + void add_out_param(const void *ptr, VarType vtype); /// Helper function recording an output access, given the pointer and the /// uint32_t representation of a \ref VarType - void add_out_param(uint32_t slot, uint32_t vtype) { - add_out_param(slot, (VarType) vtype); - } + void add_out_param(uint32_t slot, uint32_t vtype); }; - -void jitc_freeze_start(JitBackend backend, const uint32_t *inputs, - uint32_t n_inputs); - -Recording *jitc_freeze_stop(JitBackend backend, const uint32_t *outputs, - uint32_t n_outputs); - -void jitc_freeze_abort(JitBackend backend); - -void jitc_freeze_destroy(Recording *recording); - -bool jitc_freeze_pause(JitBackend backend); - -bool jitc_freeze_resume(JitBackend backend); - -void jitc_freeze_replay(Recording *recording, const uint32_t *inputs, - uint32_t *outputs); - -int jitc_freeze_dry_run(Recording *recording, const uint32_t *inputs, - uint32_t *outputs); diff --git a/tests/record.cpp b/tests/record.cpp index 1f191afd..e75d4c6d 100644 --- a/tests/record.cpp +++ b/tests/record.cpp @@ -11,7 +11,7 @@ TEST_BOTH(01_basic_replay) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); @@ -27,11 +27,11 @@ TEST_BOTH(01_basic_replay) { recording = jit_freeze_stop(Backend, outputs, 1); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 r0(2, 3, 4, 5, 6, 7, 8, 9, 10, 11); @@ -41,7 +41,7 @@ TEST_BOTH(01_basic_replay) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } @@ -54,7 +54,7 @@ TEST_BOTH(01_basic_replay) { TEST_BOTH(02_MIMO) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 i1(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); @@ -81,13 +81,13 @@ TEST_BOTH(02_MIMO) { recording = jit_freeze_stop(Backend, outputs, 2); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 i1(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); @@ -102,8 +102,8 @@ TEST_BOTH(02_MIMO) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } @@ -119,7 +119,7 @@ TEST_BOTH(02_MIMO) { TEST_BOTH(03_deduplicating_input) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0(0, 2, 4, 6, 8, 10, 12, 14, 16, 18); @@ -145,13 +145,13 @@ TEST_BOTH(03_deduplicating_input) { recording = jit_freeze_stop(Backend, outputs, 2); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 r0(2, 4, 6, 8, 10, 12, 14, 16, 18, 20); @@ -165,8 +165,8 @@ TEST_BOTH(03_deduplicating_input) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } @@ -183,7 +183,7 @@ TEST_BOTH(04_deduplicating_output) { Recording *recording; jit_set_log_level_stderr(LogLevel::Debug); - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 i1(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); @@ -210,13 +210,13 @@ TEST_BOTH(04_deduplicating_output) { recording = jit_freeze_stop(Backend, outputs, 2); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 i1(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); @@ -231,8 +231,8 @@ TEST_BOTH(04_deduplicating_output) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } @@ -241,14 +241,14 @@ TEST_BOTH(04_deduplicating_output) { } /** - * This tests, weather it is possible to record multiple kernels in sequence. + * This tests, Whether it is possible to record multiple kernels in sequence. * The input of the second kernel relies on the execution of the first. * On LLVM, the correctness of barrier operations is therefore tested. */ TEST_BOTH(05_sequential_kernels) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0(2, 3, 4, 5, 6, 7, 8, 9, 10, 11); @@ -272,11 +272,11 @@ TEST_BOTH(05_sequential_kernels) { recording = jit_freeze_stop(Backend, outputs, 1); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0(2, 3, 4, 5, 6, 7, 8, 9, 10, 11); @@ -288,7 +288,7 @@ TEST_BOTH(05_sequential_kernels) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } @@ -296,7 +296,7 @@ TEST_BOTH(05_sequential_kernels) { } /** - * This tests, weather it is possible to record multiple independent kernels in + * This tests, Whether it is possible to record multiple independent kernels in * the same recording. * The variables of the kernels are of different size, therefore two kernels are * generated. At replay these can be executed in parallel (LLVM) or sequence @@ -305,7 +305,7 @@ TEST_BOTH(05_sequential_kernels) { TEST_BOTH(06_parallel_kernels) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 i1(0, 1, 2, 3, 4, 5); @@ -332,13 +332,13 @@ TEST_BOTH(06_parallel_kernels) { recording = jit_freeze_stop(Backend, outputs, 2); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 i1(1, 2, 3, 4, 5, 6); @@ -353,8 +353,8 @@ TEST_BOTH(06_parallel_kernels) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(r1.index(), outputs[1]))); } @@ -369,7 +369,7 @@ TEST_BOTH(06_parallel_kernels) { TEST_BOTH(07_reduce_hsum) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0 = opaque(55, 1); @@ -392,11 +392,11 @@ TEST_BOTH(07_reduce_hsum) { recording = jit_freeze_stop(Backend, outputs, 1); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 r0 = opaque(65, 1); @@ -408,7 +408,7 @@ TEST_BOTH(07_reduce_hsum) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } @@ -421,7 +421,7 @@ TEST_BOTH(07_reduce_hsum) { TEST_BOTH(08_prefix_sum) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0(0, 1, 3, 6, 10, 15, 21, 28, 36, 45); @@ -443,11 +443,11 @@ TEST_BOTH(08_prefix_sum) { recording = jit_freeze_stop(Backend, outputs, 1); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 r0(1, 3, 6, 10, 15, 21, 28, 36, 45, 55); @@ -459,7 +459,7 @@ TEST_BOTH(08_prefix_sum) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } @@ -475,7 +475,7 @@ TEST_BOTH(08_prefix_sum) { TEST_BOTH(9_resized_input) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2); UInt32 r0(1, 2, 3); @@ -491,11 +491,11 @@ TEST_BOTH(9_resized_input) { recording = jit_freeze_stop(Backend, outputs, 1); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4); UInt32 r0(2, 3, 4, 5); @@ -505,7 +505,7 @@ TEST_BOTH(9_resized_input) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } @@ -519,7 +519,7 @@ TEST_BOTH(9_resized_input) { TEST_BOTH(10_input_passthrough) { Recording *recording; - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); { UInt32 i0(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); UInt32 r0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); @@ -535,13 +535,13 @@ TEST_BOTH(10_input_passthrough) { recording = jit_freeze_stop(Backend, outputs, 2); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(i0.index(), outputs[1]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { UInt32 i0(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); UInt32 r0(2, 3, 4, 5, 6, 7, 8, 9, 10, 11); @@ -551,8 +551,8 @@ TEST_BOTH(10_input_passthrough) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "o1: %s", jit_var_str(outputs[1])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o1: %s", jit_var_str(outputs[1])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); jit_assert(jit_var_all(jit_var_eq(i0.index(), outputs[1]))); } diff --git a/tests/vcall.cpp b/tests/vcall.cpp index 7240661c..ecdf451f 100644 --- a/tests/vcall.cpp +++ b/tests/vcall.cpp @@ -1257,8 +1257,6 @@ TEST_BOTH(14_kernel_record) { jit_set_flag(JitFlag::VCallOptimize, true); jit_set_flag(JitFlag::SymbolicCalls, true); - Recording *recording; - struct Base { virtual UInt32 f(UInt32 x) = 0; }; @@ -1294,12 +1292,14 @@ TEST_BOTH(14_kernel_record) { outputs[0] = o0.index(); }; + + Recording *recording; { BasePtr self = arange(10) % 3; UInt32 i0 = arange(10); UInt32 r0(0, 2, 4, 0, 5, 7, 0, 8, 10, 0); - jit_log(LogLevel::Info, "Recording:"); + jit_log(LogLevel::Debug, "Recording:"); // This function does not perform any validation on the input. // The input should however be evaluated before starting freezing. @@ -1320,9 +1320,9 @@ TEST_BOTH(14_kernel_record) { Mask mask = Mask::steal(jit_var_bool(Backend, true)); - jit_log(LogLevel::Info, "self: %u", self.index()); - jit_log(LogLevel::Info, "mask: %u", mask.index()); - jit_log(LogLevel::Info, "i0: %u", i0.index()); + jit_log(LogLevel::Debug, "self: %u", self.index()); + jit_log(LogLevel::Debug, "mask: %u", mask.index()); + jit_log(LogLevel::Debug, "i0: %u", i0.index()); symbolic_call( Backend, domain, false, self.index(), mask.index(), f_call, vcall_inputs, vcall_outputs); @@ -1330,20 +1330,20 @@ TEST_BOTH(14_kernel_record) { o0 = UInt32::borrow(vcall_outputs[0]); o0.eval(); - jit_log(LogLevel::Info, "o0: %u", o0.index()); + jit_log(LogLevel::Debug, "o0: %u", o0.index()); outputs[0] = o0.index(); } recording = jit_freeze_stop(Backend, outputs, 1); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); - jit_log(LogLevel::Info, "r0: %s", jit_var_str(r0.index())); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "r0: %s", jit_var_str(r0.index())); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); } - jit_log(LogLevel::Info, "Replay:"); + jit_log(LogLevel::Debug, "Replay:"); { /** * Between recording and replay, the registry has to stay the same i.e. @@ -1366,7 +1366,7 @@ TEST_BOTH(14_kernel_record) { jit_freeze_replay(recording, inputs, outputs); - jit_log(LogLevel::Info, "o0: %s", jit_var_str(outputs[0])); + jit_log(LogLevel::Debug, "o0: %s", jit_var_str(outputs[0])); jit_assert(jit_var_all(jit_var_eq(r0.index(), outputs[0]))); }