diff --git a/libyara/exec.c b/libyara/exec.c index f324eb8ed4..62a1e72fb7 100644 --- a/libyara/exec.c +++ b/libyara/exec.c @@ -1438,7 +1438,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) case OP_FOUND: pop(r1); - r2.i = context->matches[r1.s->idx].tail != NULL ? 1 : 0; + r2.i = context->matches.entries[r1.s->idx].tail != NULL ? 1 : 0; YR_DEBUG_FPRINTF( 2, stderr, @@ -1460,7 +1460,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) ensure_within_rules_arena(r2.p); #endif - match = context->matches[r2.s->idx].head; + match = context->matches.entries[r2.s->idx].head; r3.i = false; while (match != NULL) @@ -1494,7 +1494,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) ensure_within_rules_arena(r3.p); #endif - match = context->matches[r3.s->idx].head; + match = context->matches.entries[r3.s->idx].head; r4.i = false; while (match != NULL && !r4.i) @@ -1522,7 +1522,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) ensure_within_rules_arena(r1.p); #endif - r2.i = context->matches[r1.s->idx].count; + r2.i = context->matches.entries[r1.s->idx].count; push(r2); break; @@ -1540,7 +1540,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) ensure_within_rules_arena(r3.p); #endif - match = context->matches[r3.s->idx].head; + match = context->matches.entries[r3.s->idx].head; r4.i = 0; while (match != NULL) @@ -1571,7 +1571,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) ensure_within_rules_arena(r2.p); #endif - match = context->matches[r2.s->idx].head; + match = context->matches.entries[r2.s->idx].head; i = 1; r3.i = YR_UNDEFINED; @@ -1599,7 +1599,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) ensure_within_rules_arena(r2.p); #endif - match = context->matches[r2.s->idx].head; + match = context->matches.entries[r2.s->idx].head; i = 1; r3.i = YR_UNDEFINED; @@ -1629,7 +1629,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) { if (r2.i == OF_STRING_SET) { - if (context->matches[r1.s->idx].tail != NULL) + if (context->matches.entries[r1.s->idx].tail != NULL) { found++; } @@ -1714,7 +1714,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) #if YR_PARANOID_EXEC ensure_within_rules_arena(r3.p); #endif - match = context->matches[r3.s->idx].head; + match = context->matches.entries[r3.s->idx].head; while (match != NULL) { @@ -1790,7 +1790,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) #if YR_PARANOID_EXEC ensure_within_rules_arena(r1.p); #endif - match = context->matches[r1.s->idx].head; + match = context->matches.entries[r1.s->idx].head; while (match != NULL) { diff --git a/libyara/include/yara/rules.h b/libyara/include/yara/rules.h index 2606e93c1e..a4177c713d 100644 --- a/libyara/include/yara/rules.h +++ b/libyara/include/yara/rules.h @@ -61,7 +61,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. string = STRING_IS_LAST_IN_RULE(string) ? NULL : string + 1) #define yr_string_matches_foreach(context, string, match) \ - for (match = context->matches[string->idx].head; match != NULL; \ + for (match = context->matches.entries[string->idx].head; match != NULL; \ match = match->next) \ /* private matches are skipped */ \ if (match->is_private) \ diff --git a/libyara/include/yara/types.h b/libyara/include/yara/types.h index 36aac97cf4..92434e88d9 100644 --- a/libyara/include/yara/types.h +++ b/libyara/include/yara/types.h @@ -172,6 +172,7 @@ typedef struct YR_AC_MATCH YR_AC_MATCH; typedef struct YR_NAMESPACE YR_NAMESPACE; typedef struct YR_META YR_META; typedef struct YR_MATCHES YR_MATCHES; +typedef struct YR_MATCHLIST YR_MATCHLIST; typedef struct YR_STRING YR_STRING; typedef struct YR_RULE YR_RULE; typedef struct YR_RULES YR_RULES; @@ -479,6 +480,28 @@ struct YR_MATCHES YR_MATCH* tail; int32_t count; + + // If true, this YR_MATCHES instance contained a YR_MATCH at some point + // of the scan and will need to be zeroed at the end of the scan. + bool dirty; +}; + +struct YR_MATCHLIST +{ + // Array with pointers to lists of matches. Item N in the array has the + // list of matches for string with index N. + // Total size is equal to length. + YR_MATCHES* entries; + + // Array with indices of "dirty" matches, that is, elements in entry that + // must be cleaned before this match list can be reused. + // Total size is equal to length, but only the first dirty_count elements + // are actually valid; all further elements must be ignored. + int32_t* dirty_entries; + int32_t dirty_count; + + // Size of this match list + int32_t length; }; struct YR_MATCH @@ -816,9 +839,8 @@ struct YR_SCAN_CONTEXT // N has too many matches. YR_BITMASK* strings_temp_disabled; - // Array with pointers to lists of matches. Item N in the array has the - // list of matches for string with index N. - YR_MATCHES* matches; + // A match list containing the matches per string. + YR_MATCHLIST matches; // "unconfirmed_matches" is like "matches" but for strings that are part of // a chain. Let's suppose that the string S is split in two chained strings @@ -826,7 +848,7 @@ struct YR_SCAN_CONTEXT // until a match for S2 is found (within the range defined by chain_gap_min // and chain_gap_max), so the matches for S1 are put in "unconfirmed_matches" // until they can be confirmed or discarded. - YR_MATCHES* unconfirmed_matches; + YR_MATCHLIST unconfirmed_matches; // A bitmap with one bit per rule, bit N is set if the corresponding rule // must evaluated. diff --git a/libyara/scan.c b/libyara/scan.c index 5e4c8dc308..c73d927633 100644 --- a/libyara/scan.c +++ b/libyara/scan.c @@ -270,7 +270,7 @@ static void _yr_scan_update_match_chain_length( if (string->chained_to == NULL) return; - match = context->unconfirmed_matches[string->chained_to->idx].head; + match = context->unconfirmed_matches.entries[string->chained_to->idx].head; while (match != NULL) { @@ -289,9 +289,23 @@ static void _yr_scan_update_match_chain_length( static int _yr_scan_add_match_to_list( YR_MATCH* match, - YR_MATCHES* matches_list, + YR_MATCHLIST* matchlist, + int string_index, int replace_if_exists) { + YR_MATCHES* matches_list = &matchlist->entries[string_index]; + + // Mark entry as dirty if it is't marked as such yet + if (!matches_list->dirty) + { + matches_list->dirty = true; + // Since dirty_entries has sufficient size for all strings, and + // this one is not marked as dirty and thus not part of dirty_entries yet, + // we can safely add it to dirty_entries without exceeding its maximum size. + matchlist->dirty_entries[matchlist->dirty_count] = string_index; + matchlist->dirty_count++; + } + int result = ERROR_SUCCESS; #if YR_DEBUG_VERBOSITY > 0 @@ -369,8 +383,11 @@ _exit:; static void _yr_scan_remove_match_from_list( YR_MATCH* match, - YR_MATCHES* matches_list) + YR_MATCHLIST* matchlist, + int string_index) { + YR_MATCHES* matches_list = &matchlist->entries[string_index]; + if (match->prev != NULL) match->prev->next = match->next; @@ -386,6 +403,10 @@ static void _yr_scan_remove_match_from_list( matches_list->count--; match->next = NULL; match->prev = NULL; + + // If matches_list->count == 0 now, we could mark this matchlist entry as no longer dirty. + // However, finding and removing the entry from the dirty list takes more time than leaving + // it dirty and (unnecessarily) cleaning it later on, so we don't bother. } // @@ -449,7 +470,7 @@ static int _yr_scan_verify_chained_string_match( // list of unconfirmed matches. Unconfirmed matches are sorted in ascending // offset order. If no unconfirmed match exists, the lowest possible offset // is the offset of the current match. - match = context->unconfirmed_matches[matching_string->idx].head; + match = context->unconfirmed_matches.entries[matching_string->idx].head; if (match != NULL) lowest_offset = match->offset; @@ -460,7 +481,7 @@ static int _yr_scan_verify_chained_string_match( // precedes the currently matching string. If we have a string chain like: // S1 <- S2 <- S3, and we just found a match for S2, we are iterating the // list of unconfirmed matches of S1. - match = context->unconfirmed_matches[matching_string->chained_to->idx].head; + match = context->unconfirmed_matches.entries[matching_string->chained_to->idx].head; while (match != NULL) { @@ -481,7 +502,8 @@ static int _yr_scan_verify_chained_string_match( // match can't be an actual match) _yr_scan_remove_match_from_list( match, - &context->unconfirmed_matches[matching_string->chained_to->idx]); + &context->unconfirmed_matches, + matching_string->chained_to->idx); } else if ( ending_offset + matching_string->chain_gap_max >= match_offset && @@ -517,7 +539,7 @@ static int _yr_scan_verify_chained_string_match( // every unconfirmed match in all the strings in the chain up to the head // of the chain. match = - context->unconfirmed_matches[matching_string->chained_to->idx].head; + context->unconfirmed_matches.entries[matching_string->chained_to->idx].head; while (match != NULL) { @@ -543,7 +565,7 @@ static int _yr_scan_verify_chained_string_match( } // "string" points now to the head of the strings chain. - match = context->unconfirmed_matches[string->idx].head; + match = context->unconfirmed_matches.entries[string->idx].head; // Iterate over the list of unconfirmed matches of the head of the chain, // and move to the list of confirmed matches those with a chain_length @@ -556,7 +578,9 @@ static int _yr_scan_verify_chained_string_match( if (match->chain_length == full_chain_length) { _yr_scan_remove_match_from_list( - match, &context->unconfirmed_matches[string->idx]); + match, + &context->unconfirmed_matches, + string->idx); match->match_length = (int32_t) (match_offset - match->offset + match_length); @@ -580,7 +604,7 @@ static int _yr_scan_verify_chained_string_match( yr_bitmask_set(context->required_eval, string->rule_idx); FAIL_ON_ERROR(_yr_scan_add_match_to_list( - match, &context->matches[string->idx], false)); + match, &context->matches, string->idx, false)); } match = next_match; @@ -627,7 +651,8 @@ static int _yr_scan_verify_chained_string_match( // an actual match until finding the remaining parts of the chain. FAIL_ON_ERROR(_yr_scan_add_match_to_list( new_match, - &context->unconfirmed_matches[matching_string->idx], + &context->unconfirmed_matches, + matching_string->idx, false)); } } @@ -758,7 +783,8 @@ static int _yr_scan_match_callback( FAIL_ON_ERROR(_yr_scan_add_match_to_list( new_match, - &callback_args->context->matches[string->idx], + &callback_args->context->matches, + string->idx, STRING_IS_GREEDY_REGEXP(string))); } } @@ -1059,7 +1085,7 @@ int yr_scan_verify_match( return ERROR_SUCCESS; if (context->flags & SCAN_FLAGS_FAST_MODE && STRING_IS_SINGLE_MATCH(string) && - context->matches[string->idx].head != NULL) + context->matches.entries[string->idx].head != NULL) return ERROR_SUCCESS; if (STRING_IS_FIXED_OFFSET(string) && diff --git a/libyara/scanner.c b/libyara/scanner.c index 3667561161..bb6785bc44 100644 --- a/libyara/scanner.c +++ b/libyara/scanner.c @@ -106,7 +106,7 @@ static int _yr_scanner_scan_mem_block( match = &rules->ac_match_pool[match_table[state] - 1]; - if (scanner->matches->count >= YR_SLOW_STRING_MATCHES) + if (scanner->matches.entries->count >= YR_SLOW_STRING_MATCHES) { report_string = match->string; rule = report_string @@ -172,8 +172,8 @@ static int _yr_scanner_scan_mem_block( } } - if (rule != NULL && scanner->matches->count >= YR_SLOW_STRING_MATCHES && - scanner->matches->count < YR_MAX_STRING_MATCHES) + if (rule != NULL && scanner->matches.entries->count >= YR_SLOW_STRING_MATCHES && + scanner->matches.entries->count < YR_MAX_STRING_MATCHES) { if (rule != NULL && report_string != NULL) { @@ -203,6 +203,46 @@ static int _yr_scanner_scan_mem_block( return result; } +static int _yr_matchlist_create(YR_MATCHLIST* list, int32_t capacity) +{ + YR_DEBUG_FPRINTF(2, stderr, "- %s() {} \n", __FUNCTION__); + + YR_MATCHLIST new_list; + + new_list.dirty_entries = (int32_t*) yr_calloc(capacity, sizeof(int32_t)); + if (new_list.dirty_entries == NULL && capacity > 0) + { + return ERROR_INSUFFICIENT_MEMORY; + } + + new_list.entries = (YR_MATCHES*) yr_calloc(capacity, sizeof(YR_MATCHES)); + if (new_list.entries == NULL && capacity > 0) + { + yr_free(new_list.dirty_entries); + return ERROR_INSUFFICIENT_MEMORY; + } + new_list.dirty_count = 0; + new_list.length = capacity; + *list = new_list; + return ERROR_SUCCESS; +} + +static void _yr_matchlist_destroy(YR_MATCHLIST list) +{ + yr_free(list.entries); + yr_free(list.dirty_entries); +} + +static void _yr_matchlist_clear(YR_MATCHLIST list) +{ + for (int i = 0; i < list.dirty_count; i++) + { + int32_t dirty_entry = list.dirty_entries[i]; + memset(&list.entries[dirty_entry], 0, sizeof(YR_MATCHES)); + } + list.dirty_count = 0; +} + static void _yr_scanner_clean_matches(YR_SCANNER* scanner) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {} \n", __FUNCTION__); @@ -227,12 +267,8 @@ static void _yr_scanner_clean_matches(YR_SCANNER* scanner) 0, sizeof(YR_BITMASK) * YR_BITMASK_SIZE(scanner->rules->num_strings)); - memset(scanner->matches, 0, sizeof(YR_MATCHES) * scanner->rules->num_strings); - - memset( - scanner->unconfirmed_matches, - 0, - sizeof(YR_MATCHES) * scanner->rules->num_strings); + _yr_matchlist_clear(scanner->matches); + _yr_matchlist_clear(scanner->unconfirmed_matches); } YR_API int yr_scanner_create(YR_RULES* rules, YR_SCANNER** scanner) @@ -272,23 +308,30 @@ YR_API int yr_scanner_create(YR_RULES* rules, YR_SCANNER** scanner) new_scanner->strings_temp_disabled = (YR_BITMASK*) yr_calloc( sizeof(YR_BITMASK), YR_BITMASK_SIZE(rules->num_strings)); - new_scanner->matches = (YR_MATCHES*) yr_calloc( - rules->num_strings, sizeof(YR_MATCHES)); - - new_scanner->unconfirmed_matches = (YR_MATCHES*) yr_calloc( - rules->num_strings, sizeof(YR_MATCHES)); - if (new_scanner->rule_matches_flags == NULL || new_scanner->required_eval == NULL || new_scanner->ns_unsatisfied_flags == NULL || - new_scanner->strings_temp_disabled == NULL || - (new_scanner->matches == NULL && rules->num_strings > 0) || - (new_scanner->unconfirmed_matches == NULL && rules->num_strings > 0)) + new_scanner->strings_temp_disabled == NULL) { yr_scanner_destroy(new_scanner); return ERROR_INSUFFICIENT_MEMORY; } + int err = _yr_matchlist_create(&new_scanner->matches, rules->num_strings); + if (err != ERROR_SUCCESS) + { + yr_scanner_destroy(new_scanner); + return err; + } + + err = _yr_matchlist_create(&new_scanner->unconfirmed_matches, rules->num_strings); + if (err != ERROR_SUCCESS) + { + _yr_matchlist_destroy(new_scanner->matches); + yr_scanner_destroy(new_scanner); + return err; + } + #ifdef YR_PROFILING_ENABLED new_scanner->profiling_info = yr_calloc( rules->num_rules, sizeof(YR_PROFILING_INFO)); @@ -368,8 +411,8 @@ YR_API void yr_scanner_destroy(YR_SCANNER* scanner) yr_free(scanner->ns_unsatisfied_flags); yr_free(scanner->required_eval); yr_free(scanner->strings_temp_disabled); - yr_free(scanner->matches); - yr_free(scanner->unconfirmed_matches); + _yr_matchlist_destroy(scanner->matches); + _yr_matchlist_destroy(scanner->unconfirmed_matches); yr_free(scanner); }