Skip to content

Commit

Permalink
Add metric for when the file on disk is not the file being evaluated (g…
Browse files Browse the repository at this point in the history
…oogle#1348)

* Add metrics for stat change detection

* Fix test related issues due to partially constructed messages

* lint

* Convert errno to enum class StatResult

* Cleanup from PR feedback
  • Loading branch information
mlw authored May 16, 2024
1 parent 67883c5 commit 9b184ed
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 38 deletions.
1 change: 1 addition & 0 deletions Source/santad/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,7 @@ objc_library(
hdrs = ["EventProviders/EndpointSecurity/Message.h"],
deps = [
":EndpointSecurityClient",
":Metrics",
":WatchItemPolicy",
"//Source/santad/ProcessTree:process_tree",
],
Expand Down
13 changes: 13 additions & 0 deletions Source/santad/EventProviders/EndpointSecurity/Message.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <memory>
#include <string>

#include "Source/santad/Metrics.h"
#include "Source/santad/ProcessTree/process_tree.h"

namespace santa::santad::event_providers::endpoint_security {
Expand Down Expand Up @@ -53,12 +54,24 @@ class Message {

std::string ParentProcessName() const;

void UpdateStatState(santa::santad::StatChangeStep step) const;

inline santa::santad::StatChangeStep StatChangeStep() const {
return stat_change_step_;
}
inline StatResult StatError() const { return stat_result_; }

private:
std::shared_ptr<EndpointSecurityAPI> esapi_;
const es_message_t* es_msg_;
std::optional<process_tree::ProcessToken> process_token_;

std::string GetProcessName(pid_t pid) const;

mutable santa::santad::StatChangeStep stat_change_step_ =
santa::santad::StatChangeStep::kNoChange;
mutable santa::santad::StatResult stat_result_ =
santa::santad::StatResult::kOK;
};

} // namespace santa::santad::event_providers::endpoint_security
Expand Down
26 changes: 26 additions & 0 deletions Source/santad/EventProviders/EndpointSecurity/Message.mm
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <bsm/libbsm.h>
#include <libproc.h>
#include <sys/stat.h>

#include "Source/santad/EventProviders/EndpointSecurity/EndpointSecurityAPI.h"

Expand All @@ -24,6 +25,7 @@
Message::Message(std::shared_ptr<EndpointSecurityAPI> esapi, const es_message_t *es_msg)
: esapi_(std::move(esapi)), es_msg_(es_msg), process_token_(std::nullopt) {
esapi_->RetainMessage(es_msg);
UpdateStatState(santa::santad::StatChangeStep::kMessageCreate);
}

Message::~Message() {
Expand All @@ -38,13 +40,37 @@
other.es_msg_ = nullptr;
process_token_ = std::move(other.process_token_);
other.process_token_ = std::nullopt;
stat_change_step_ = other.stat_change_step_;
stat_result_ = other.stat_result_;
}

Message::Message(const Message &other) {
esapi_ = other.esapi_;
es_msg_ = other.es_msg_;
esapi_->RetainMessage(es_msg_);
process_token_ = other.process_token_;
stat_change_step_ = other.stat_change_step_;
stat_result_ = other.stat_result_;
}

void Message::UpdateStatState(santa::santad::StatChangeStep step) const {
// Only update state for AUTH EXEC events and if no previous change was detected
if (es_msg_->event_type == ES_EVENT_TYPE_AUTH_EXEC &&
stat_change_step_ == santa::santad::StatChangeStep::kNoChange &&
// Note: The following checks are required due to tests that only
// partially construct an es_message_t.
es_msg_->event.exec.target && es_msg_->event.exec.target->executable) {
struct stat &es_sb = es_msg_->event.exec.target->executable->stat;
struct stat sb;
int ret = stat(es_msg_->event.exec.target->executable->path.data, &sb);
// If stat failed, or if devno/inode changed, update state.
if (ret != 0 || es_sb.st_ino != sb.st_ino || es_sb.st_dev != sb.st_dev) {
stat_change_step_ = step;
// Determine the specific condition that failed for tracking purposes
stat_result_ = (ret != 0) ? santa::santad::StatResult::kStatError
: santa::santad::StatResult::kDevnoInodeMismatch;
}
}
}

void Message::SetProcessToken(process_tree::ProcessToken tok) {
Expand Down
9 changes: 6 additions & 3 deletions Source/santad/EventProviders/SNTEndpointSecurityClient.mm
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ - (void)establishClientOrDie {
if ([self handleContextMessage:esMsg]) {
int64_t processingEnd = clock_gettime_nsec_np(CLOCK_MONOTONIC);
self->_metrics->SetEventMetrics(self->_processor, eventType, EventDisposition::kProcessed,
processingEnd - processingStart);
processingEnd - processingStart, esMsg.StatChangeStep(),
esMsg.StatError());
return;
}

Expand All @@ -160,12 +161,14 @@ - (void)establishClientOrDie {
recordEventMetrics:^(EventDisposition disposition) {
int64_t processingEnd = clock_gettime_nsec_np(CLOCK_MONOTONIC);
self->_metrics->SetEventMetrics(self->_processor, eventType, disposition,
processingEnd - processingStart);
processingEnd - processingStart, esMsg.StatChangeStep(),
esMsg.StatError());
}];
} else {
int64_t processingEnd = clock_gettime_nsec_np(CLOCK_MONOTONIC);
self->_metrics->SetEventMetrics(self->_processor, eventType, EventDisposition::kDropped,
processingEnd - processingStart);
processingEnd - processingStart, esMsg.StatChangeStep(),
esMsg.StatError());
}
});

Expand Down
22 changes: 19 additions & 3 deletions Source/santad/Metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,22 @@ enum class FileAccessMetricStatus {
kBlockedUser,
};

enum class StatChangeStep {
kNoChange = 0,
kMessageCreate,
kCodesignValidation,
};

enum class StatResult {
kOK = 0,
kStatError,
kDevnoInodeMismatch,
};

using EventCountTuple = std::tuple<Processor, es_event_type_t, EventDisposition>;
using EventTimesTuple = std::tuple<Processor, es_event_type_t>;
using EventStatsTuple = std::tuple<Processor, es_event_type_t>;
using EventStatChangeTuple = std::tuple<StatChangeStep, StatResult>;
using FileAccessMetricsPolicyVersion = std::string;
using FileAccessMetricsPolicyName = std::string;
using FileAccessEventCountTuple =
Expand All @@ -67,8 +80,8 @@ class Metrics : public std::enable_shared_from_this<Metrics> {
Metrics(dispatch_queue_t q, dispatch_source_t timer_source, uint64_t interval,
SNTMetricInt64Gauge *event_processing_times, SNTMetricCounter *event_counts,
SNTMetricCounter *rate_limit_counts, SNTMetricCounter *drop_counts,
SNTMetricCounter *faa_event_counts, SNTMetricSet *metric_set,
void (^run_on_first_start)(Metrics *));
SNTMetricCounter *faa_event_counts, SNTMetricCounter *stat_change_counts,
SNTMetricSet *metric_set, void (^run_on_first_start)(Metrics *));

~Metrics();

Expand All @@ -84,7 +97,8 @@ class Metrics : public std::enable_shared_from_this<Metrics> {
void UpdateEventStats(Processor processor, const es_message_t *msg);

void SetEventMetrics(Processor processor, es_event_type_t event_type,
EventDisposition disposition, int64_t nanos);
EventDisposition disposition, int64_t nanos, StatChangeStep step,
StatResult stat_result);

void SetRateLimitingMetrics(Processor processor, int64_t events_rate_limited_count);

Expand Down Expand Up @@ -112,6 +126,7 @@ class Metrics : public std::enable_shared_from_this<Metrics> {
SNTMetricCounter *rate_limit_counts_;
SNTMetricCounter *faa_event_counts_;
SNTMetricCounter *drop_counts_;
SNTMetricCounter *stat_change_counts_;
SNTMetricSet *metric_set_;
// Tracks whether or not the timer_source should be running.
// This helps manage dispatch source state to ensure the source is not
Expand All @@ -129,6 +144,7 @@ class Metrics : public std::enable_shared_from_this<Metrics> {
std::map<Processor, int64_t> rate_limit_counts_cache_;
std::map<FileAccessEventCountTuple, int64_t> faa_event_counts_cache_;
std::map<EventStatsTuple, SequenceStats> drop_cache_;
std::map<EventStatChangeTuple, int64_t> stat_change_cache_;
};

} // namespace santa::santad
Expand Down
58 changes: 54 additions & 4 deletions Source/santad/Metrics.mm
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@
static NSString *const kEventDispositionDropped = @"Dropped";
static NSString *const kEventDispositionProcessed = @"Processed";

static NSString *const kStatChangeStepNoChange = @"NoChange";
static NSString *const kStatChangeStepMessageCreate = @"MessageCreate";
static NSString *const kStatChangeStepCodesignValidation = @"CodesignValidation";

static NSString *const kStatResultOK = @"OK";
static NSString *const kStatResultStatError = @"StatError";
static NSString *const kStatResultDevnoInodeMismatch = @"DevnoInodeMismatch";

// Compat values
static NSString *const kFileAccessMetricStatusOK = @"OK";
static NSString *const kFileAccessMetricStatusBlockedUser = @"BLOCKED_USER";
Expand Down Expand Up @@ -148,6 +156,30 @@
}
}

NSString *const StatChangeStepToString(StatChangeStep step) {
switch (step) {
case StatChangeStep::kNoChange: return kStatChangeStepNoChange;
case StatChangeStep::kMessageCreate: return kStatChangeStepMessageCreate;
case StatChangeStep::kCodesignValidation: return kStatChangeStepCodesignValidation;
default:
[NSException raise:@"Invalid stat change step"
format:@"Unknown stat change step value: %d", static_cast<int>(step)];
return nil;
}
}

NSString *const StatResultToString(StatResult result) {
switch (result) {
case StatResult::kOK: return kStatResultOK;
case StatResult::kStatError: return kStatResultStatError;
case StatResult::kDevnoInodeMismatch: return kStatResultDevnoInodeMismatch;
default:
[NSException raise:@"Invalid stat result"
format:@"Unknown stat result value: %d", static_cast<int>(result)];
return nil;
}
}

std::shared_ptr<Metrics> Metrics::Create(SNTMetricSet *metric_set, uint64_t interval) {
dispatch_queue_t q = dispatch_queue_create("com.google.santa.santametricsservice.q",
DISPATCH_QUEUE_SERIAL_WITH_AUTORELEASE_POOL);
Expand Down Expand Up @@ -181,9 +213,14 @@
fieldNames:@[ @"Processor", @"Event" ]
helpText:@"Count of the number of drops for each event"];

SNTMetricCounter *stat_change_counts =
[metric_set counterWithName:@"/santa/event_stat_change_count"
fieldNames:@[ @"step", @"error" ]
helpText:@"Count of times a stat info changed for a binary being evalauted"];

std::shared_ptr<Metrics> metrics = std::make_shared<Metrics>(
q, timer_source, interval, event_processing_times, event_counts, rate_limit_counts,
faa_event_counts, drop_counts, metric_set, ^(Metrics *metrics) {
faa_event_counts, drop_counts, stat_change_counts, metric_set, ^(Metrics *metrics) {
SNTRegisterCoreMetrics();
metrics->EstablishConnection();
});
Expand All @@ -204,8 +241,8 @@
Metrics::Metrics(dispatch_queue_t q, dispatch_source_t timer_source, uint64_t interval,
SNTMetricInt64Gauge *event_processing_times, SNTMetricCounter *event_counts,
SNTMetricCounter *rate_limit_counts, SNTMetricCounter *faa_event_counts,
SNTMetricCounter *drop_counts, SNTMetricSet *metric_set,
void (^run_on_first_start)(Metrics *))
SNTMetricCounter *drop_counts, SNTMetricCounter *stat_change_counts,
SNTMetricSet *metric_set, void (^run_on_first_start)(Metrics *))
: q_(q),
timer_source_(timer_source),
interval_(interval),
Expand All @@ -214,6 +251,7 @@
rate_limit_counts_(rate_limit_counts),
faa_event_counts_(faa_event_counts),
drop_counts_(drop_counts),
stat_change_counts_(stat_change_counts),
metric_set_(metric_set),
run_on_first_start_(run_on_first_start) {
SetInterval(interval_);
Expand Down Expand Up @@ -307,13 +345,23 @@
}
}

for (const auto &[key, count] : stat_change_cache_) {
if (count > 0) {
NSString *stepName = StatChangeStepToString(std::get<StatChangeStep>(key));
NSString *error = StatResultToString(std::get<StatResult>(key));

[stat_change_counts_ incrementBy:count forFieldValues:@[ stepName, error ]];
}
}

// Reset the maps so the next cycle begins with a clean state
// IMPORTANT: Do not reset drop_cache_, the sequence numbers must persist
// for accurate accounting
event_counts_cache_ = {};
event_times_cache_ = {};
rate_limit_counts_cache_ = {};
faa_event_counts_cache_ = {};
stat_change_cache_ = {};
});
}

Expand Down Expand Up @@ -356,10 +404,12 @@
}

void Metrics::SetEventMetrics(Processor processor, es_event_type_t event_type,
EventDisposition event_disposition, int64_t nanos) {
EventDisposition event_disposition, int64_t nanos,
StatChangeStep step, StatResult stat_result) {
dispatch_sync(events_q_, ^{
event_counts_cache_[EventCountTuple{processor, event_type, event_disposition}]++;
event_times_cache_[EventTimesTuple{processor, event_type}] = nanos;
stat_change_cache_[EventStatChangeTuple{step, stat_result}]++;
});
}

Expand Down
Loading

0 comments on commit 9b184ed

Please sign in to comment.