Skip to content

Commit

Permalink
Add Logging for Empty Traces (#968)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #968

Recently, we have had users seen empty traces when the system is idle leading to confusion as to whether it was caused by a bug in kineto formatting or not. This diff adds further logging to ensure that the user is aware that kineto finds no valid trace events and is expecting an empty trace.

Reviewed By: aaronenyeshi

Differential Revision: D60311331

fbshipit-source-id: 1836d37c622a85f17c08d6337480cb5bc49eca70
  • Loading branch information
sraikund16 authored and facebook-github-bot committed Aug 1, 2024
1 parent ac37db4 commit da2f268
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
15 changes: 15 additions & 0 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
VLOG(0) << "Profile time range: " << captureWindowStartTime_ << " - "
<< captureWindowEndTime_;
logger.handleTraceStart(metadata_);
setCpuActivityPresent(false);
setGpuActivityPresent(false);
for (auto& cpu_trace : traceBuffers_->cpu) {
string trace_name = cpu_trace->span.name;
VLOG(0) << "Processing CPU buffer for " << trace_name << " ("
Expand Down Expand Up @@ -314,6 +316,9 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
LOGGER_OBSERVER_ADD_METADATA(
"ResourceOverhead", std::to_string(resourceOverheadCount_));
}
if (!gpuActivityPresent()){
LOG(WARNING) << "GPU trace is empty!";
}
}
#endif // HAS_CUPTI
#ifdef HAS_ROCTRACER
Expand All @@ -326,6 +331,9 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
LOGGER_OBSERVER_ADD_EVENT_COUNT(count);
}
#endif // HAS_ROCTRACER
if (!traceNonEmpty()) {
LOG(WARNING) << "No Valid Trace Events (CPU/GPU) found. Outputting empty trace.";
}

for (const auto& session : sessions_) {
LOG(INFO) << "Processing child profiler trace";
Expand Down Expand Up @@ -359,6 +367,7 @@ void CuptiActivityProfiler::processCpuTrace(
LOG(WARNING) << "CPU trace is empty!";
return;
}
setCpuActivityPresent(true);

CpuGpuSpanPair& span_pair =
recordTraceSpan(cpuTrace.span, cpuTrace.gpuOpCount);
Expand Down Expand Up @@ -518,6 +527,7 @@ void CuptiActivityProfiler::handleRuntimeActivity(
return;
}
runtime_activity.log(*logger);
setGpuActivityPresent(true);
}

void CuptiActivityProfiler::handleDriverActivity(
Expand Down Expand Up @@ -545,6 +555,7 @@ void CuptiActivityProfiler::handleDriverActivity(
return;
}
runtime_activity.log(*logger);
setGpuActivityPresent(true);
}

void CuptiActivityProfiler::handleOverheadActivity(
Expand All @@ -563,6 +574,7 @@ void CuptiActivityProfiler::handleOverheadActivity(
return;
}
overhead_activity.log(*logger);
setGpuActivityPresent(true);
}


Expand Down Expand Up @@ -636,6 +648,7 @@ void CuptiActivityProfiler::handleCudaSyncActivity(
<< " stream = " << activity->streamId
<< " sync type = " << syncTypeString(activity->type);
cuda_sync_activity.log(*logger);
setGpuActivityPresent(true);
};

if (isWaitEventSync(activity->type)) {
Expand Down Expand Up @@ -743,6 +756,7 @@ inline void CuptiActivityProfiler::handleGpuActivity(
seenDeviceStreams_.insert({act.deviceId(), act.resourceId()});

act.log(*logger);
setGpuActivityPresent(true);
updateGpuNetSpan(act);
if (derivedConfig_->profileActivityTypes().count(
ActivityType::GPU_USER_ANNOTATION)) {
Expand Down Expand Up @@ -854,6 +868,7 @@ void CuptiActivityProfiler::handleRuntimeActivity(
return;
}
runtime_activity.log(*logger);
setGpuActivityPresent(true);
}

inline void CuptiActivityProfiler::handleGpuActivity(
Expand Down
18 changes: 18 additions & 0 deletions libkineto/src/CuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,22 @@ class CuptiActivityProfiler {
logger_ = logger;
}

inline void setCpuActivityPresent(bool val){
cpuActivityPresent_ = val;
}

inline void setGpuActivityPresent(bool val){
gpuActivityPresent_ = val;
}

inline bool gpuActivityPresent(){
return gpuActivityPresent_;
}

inline bool traceNonEmpty(){
return cpuActivityPresent_ || gpuActivityPresent_;
}

// Synchronous control API
void startTrace(
const std::chrono::time_point<std::chrono::system_clock>& now) {
Expand Down Expand Up @@ -443,6 +459,8 @@ class CuptiActivityProfiler {
profilerOverhead setupOverhead_;

bool cpuOnly_{false};
bool cpuActivityPresent_{false};
bool gpuActivityPresent_{false};

// ***************************************************************************
// Below state is shared with external threads.
Expand Down

0 comments on commit da2f268

Please sign in to comment.