Unverified Commit 11bf6784 authored by Abhinav Anil Sharma's avatar Abhinav Anil Sharma Committed by GitHub
Browse files

i#5675 record filter: Add cache_filter and type_filter. (#5742)

Adds cache_filter that allows filtering data and/or instr trace_entry_ts in a stored offline trace by passing the addresses through a cache. Uses the existing drcachesim cache simulator to implement this.

Adds a did_last_access_hit() public API in a sub-class of cache_stats_t to get the status of the last processed access. This is used in cache_filter to determine whether the current trace_entry_t should be output or not.

Adds type_filter that allows filtering trace entries by their type, including markers by their type.

Adds the ability to stop filtering when a certain timestamp is reached. At this point, a new marker TRACE_MARKER_TYPE_FILTER_ENDPOINT is added to help tools figure out the warmup region in the trace. Adds a new get_last_timestamp() public API to memtrace_stream_t to enable this.

Adds a test that verifies cache and type filter operation.

Adds command line options to the record_filter_launcher executable to allow using the cache and type filters.

Issue: #5675
Showing with 766 additions and 79 deletions
+766 -79
......@@ -139,9 +139,7 @@ add_exported_library(drmemtrace_basic_counts STATIC tools/basic_counts.cpp)
add_exported_library(drmemtrace_opcode_mix STATIC tools/opcode_mix.cpp)
add_exported_library(drmemtrace_view STATIC tools/view.cpp)
add_exported_library(drmemtrace_func_view STATIC tools/func_view.cpp)
add_exported_library(drmemtrace_record_filter STATIC
tools/filter/record_filter.cpp
tools/filter/null_filter.h)
configure_DynamoRIO_standalone(drmemtrace_opcode_mix)
configure_DynamoRIO_standalone(drmemtrace_view)
......@@ -162,6 +160,13 @@ add_exported_library(drmemtrace_simulator STATIC
simulator/tlb_simulator.cpp
)
add_exported_library(drmemtrace_record_filter STATIC
tools/filter/record_filter.cpp
tools/filter/cache_filter.h
tools/filter/type_filter.h
tools/filter/null_filter.h)
target_link_libraries(drmemtrace_record_filter drmemtrace_simulator)
add_exported_library(directory_iterator STATIC common/directory_iterator.cpp)
add_dependencies(directory_iterator api_headers)
target_link_libraries(directory_iterator drfrontendlib)
......
......@@ -82,6 +82,12 @@ public:
virtual std::string
get_stream_name() const = 0;
/**
* Returns the value of the last seen #TRACE_MARKER_TYPE_TIMESTAMP marker.
*/
virtual uint64_t
get_last_timestamp() const = 0;
/**
* Returns the #trace_version_t value from the #TRACE_MARKER_TYPE_VERSION record
* in the trace header.
......@@ -119,4 +125,58 @@ public:
get_page_size() const = 0;
};
/**
* Implementation of memtrace_stream_t useful for mocks in tests.
*/
class test_memtrace_stream_t : public memtrace_stream_t {
public:
virtual ~test_memtrace_stream_t()
{
}
uint64_t
get_record_ordinal() const override
{
return 0;
}
uint64_t
get_instruction_ordinal() const override
{
return 0;
}
std::string
get_stream_name() const override
{
return "";
}
uint64_t
get_last_timestamp() const override
{
return 0;
}
uint64_t
get_version() const override
{
return 0;
}
uint64_t
get_filetype() const override
{
return 0;
}
uint64_t
get_cache_line_size() const override
{
return 0;
}
uint64_t
get_chunk_instr_count() const override
{
return 0;
}
uint64_t
get_page_size() const override
{
return 0;
}
};
#endif /* _MEMTRACE_STREAM_H_ */
......@@ -418,6 +418,12 @@ typedef enum {
*/
TRACE_MARKER_TYPE_RECORD_ORDINAL,
/**
* Indicates a point in the trace where filtering ended.
* This is currently added by the record_filter tool to annotate when the
* warmup part of the trace ends.
*/
TRACE_MARKER_TYPE_FILTER_ENDPOINT,
// ...
// These values are reserved for future built-in marker types.
// ...
......
......@@ -275,9 +275,15 @@ reader_t::process_input_entry()
} else {
have_memref = true;
}
if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP)
if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_TIMESTAMP) {
last_timestamp_instr_count_ = cur_instr_count_;
else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_VERSION)
// Today, a skipped memref is just a duplicate of one that we've
// already seen, so this condition is not really needed. But to
// be future-proof, we want to avoid looking at timestamps that
// won't be passed to the user as well.
if (have_memref)
last_timestamp_ = cur_ref_.marker.marker_value;
} else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_VERSION)
version_ = cur_ref_.marker.marker_value;
else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
filetype_ = cur_ref_.marker.marker_value;
......
......@@ -146,6 +146,11 @@ public:
return cur_instr_count_;
}
uint64_t
get_last_timestamp() const override
{
return last_timestamp_;
}
uint64_t
get_version() const override
{
return version_;
......@@ -200,6 +205,7 @@ protected:
int64_t suppress_ref_count_ = -1;
uint64_t cur_instr_count_ = 0;
uint64_t last_timestamp_instr_count_ = 0;
uint64_t last_timestamp_ = 0;
trace_entry_t *input_entry_ = nullptr;
// Remember top-level headers for the memtrace_stream_t interface.
uint64_t version_ = 0;
......
......@@ -145,7 +145,7 @@ public:
++cur_ref_count_;
if (type_is_instr(static_cast<trace_type_t>(cur_entry_.type)))
++cur_instr_count_;
if (cur_entry_.type == TRACE_TYPE_MARKER) {
else if (cur_entry_.type == TRACE_TYPE_MARKER) {
switch (cur_entry_.size) {
case TRACE_MARKER_TYPE_VERSION: version_ = cur_entry_.addr; break;
case TRACE_MARKER_TYPE_FILETYPE: filetype_ = cur_entry_.addr; break;
......@@ -156,6 +156,9 @@ public:
case TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT:
chunk_instr_count_ = cur_entry_.addr;
break;
case TRACE_MARKER_TYPE_TIMESTAMP:
last_timestamp_ = cur_entry_.addr;
break;
}
}
}
......@@ -173,6 +176,11 @@ public:
return cur_instr_count_;
}
uint64_t
get_last_timestamp() const override
{
return last_timestamp_;
}
uint64_t
get_version() const override
{
return version_;
......@@ -237,6 +245,8 @@ protected:
private:
uint64_t cur_ref_count_ = 0;
uint64_t cur_instr_count_ = 0;
uint64_t last_timestamp_ = 0;
// Remember top-level headers for the memtrace_stream_t interface.
uint64_t version_ = 0;
uint64_t filetype_ = 0;
......
/* **********************************************************
* Copyright (c) 2015-2020 Google, Inc. All rights reserved.
* Copyright (c) 2015-2022 Google, Inc. All rights reserved.
* **********************************************************/
/*
......@@ -46,7 +46,7 @@ public:
// In addition to caching_device_stats_t::access,
// cache_stats_t::access processes prefetching requests.
void
virtual void
access(const memref_t &memref, bool hit,
caching_device_block_t *cache_block) override;
......
......@@ -37,8 +37,12 @@
#include "droption.h"
#include "tools/basic_counts.h"
#include "tools/filter/null_filter.h"
#include "tools/filter/cache_filter.h"
#include "tools/filter/record_filter.h"
#include "tools/filter/type_filter.h"
#include <inttypes.h>
#include <fstream>
#include <vector>
#define FATAL_ERROR(msg, ...) \
......@@ -66,6 +70,55 @@ static droption_t<std::string> op_tmp_output_dir(
"[Required] Output directory for the filtered trace",
"Specifies the directory where the filtered trace will be written.");
class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t {
public:
test_record_filter_t(const std::vector<record_filter_func_t *> &filters,
uint64_t last_timestamp)
: record_filter_t("", filters, last_timestamp,
/*verbose=*/0)
{
}
std::vector<trace_entry_t>
get_output_entries()
{
return output;
}
protected:
bool
write_trace_entry(dynamorio::drmemtrace::record_filter_t::per_shard_t *shard,
const trace_entry_t &entry) override
{
output.push_back(entry);
return true;
}
std::unique_ptr<std::ostream>
get_writer(per_shard_t *per_shard, memtrace_stream_t *shard_stream) override
{
return std::unique_ptr<std::ostream>(new std::ofstream("/dev/null"));
}
private:
std::vector<trace_entry_t> output;
};
class local_stream_t : public test_memtrace_stream_t {
public:
uint64_t
get_last_timestamp() const override
{
return last_timestamp_;
}
void
set_last_timestamp(uint64_t last_timestamp)
{
last_timestamp_ = last_timestamp;
}
private:
uint64_t last_timestamp_;
};
static bool
local_create_dir(const char *dir)
{
......@@ -80,7 +133,7 @@ get_basic_counts(const std::string &trace_dir)
analysis_tool_t *basic_counts_tool = new basic_counts_t(/*verbose=*/0);
std::vector<analysis_tool_t *> tools;
tools.push_back(basic_counts_tool);
analyzer_t analyzer(trace_dir, &tools[0], (int)tools.size());
analyzer_t analyzer(trace_dir, &tools[0], static_cast<int>(tools.size()));
if (!analyzer) {
FATAL_ERROR("failed to initialize analyzer: %s",
analyzer.get_error_string().c_str());
......@@ -94,7 +147,177 @@ get_basic_counts(const std::string &trace_dir)
return counts;
}
bool
static void
print_entry(trace_entry_t entry)
{
fprintf(stderr, "%s:%d:%" PRIxPTR, trace_type_names[entry.type], entry.size,
entry.addr);
}
static bool
test_cache_and_type_filter()
{
struct expected_output {
trace_entry_t entry;
bool present[2];
};
// We test two configurations:
// 1. filter data address stream using a cache, and filter function markers
// and encoding entries.
// 2. filter data and instruction address stream using a cache.
//
std::vector<struct expected_output> entries = {
// Trace shard header.
{ { TRACE_TYPE_HEADER, 0, { 0x1 } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE, { 0x3 } }, { true, true } },
{ { TRACE_TYPE_THREAD, 0, { 0x4 } }, { true, true } },
{ { TRACE_TYPE_PID, 0, { 0x5 } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } },
{ true, true } },
// Unit header.
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, { true, true } },
{ { TRACE_TYPE_INSTR, 4, { 0xaa00 } }, { true, true } },
{ { TRACE_TYPE_WRITE, 4, { 0xaa80 } }, { true, true } },
// Unit header.
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x9 } }, { false, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xa } }, { false, true } },
{ { TRACE_TYPE_WRITE, 4, { 0xaa90 } }, { false, false } },
{ { TRACE_TYPE_ENCODING, 0, { 0 } }, { false, true } },
// Unit header.
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xb } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xc } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { 0xd } }, { false, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0xe } }, { false, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETADDR, { 0xf } },
{ false, true } },
{ { TRACE_TYPE_INSTR, 4, { 0xaa80 } }, { true, false } },
{ { TRACE_TYPE_READ, 4, { 0xaaa0 } }, { false, false } },
// The following entry is part of the expected output, but not the input. We
// will skip it in the parallel_shard_filter() loop below.
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILTER_ENDPOINT, { 0 } },
{ true, true } },
// Unit header.
// Since this timestamp is greater than the last_timestamp set below, all
// later entries will be output regardless of the configured filter.
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xabcdef } },
{ true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xa0 } }, { true, true } },
{ { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { 0xa1 } }, { true, true } },
// Trace shard footer.
{ { TRACE_TYPE_FOOTER, 0, { 0xa2 } }, { true, true } }
};
for (int k = 0; k < 2; ++k) {
auto stream = std::unique_ptr<local_stream_t>(new local_stream_t());
// Construct record_filter_func_ts.
std::vector<dynamorio::drmemtrace::record_filter_t::record_filter_func_t *>
filters;
auto cache_filter =
std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>(
new dynamorio::drmemtrace::cache_filter_t(
/*cache_associativity=*/1, /*cache_line_size=*/64, /*cache_size=*/128,
/*filter_data=*/true, /*filter_instrs=*/k == 1));
if (cache_filter->get_error_string() != "") {
fprintf(stderr, "Couldn't construct a cache_filter %s",
cache_filter->get_error_string().c_str());
return false;
}
filters.push_back(cache_filter.get());
auto type_filter =
std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>(
new dynamorio::drmemtrace::type_filter_t({ TRACE_TYPE_ENCODING },
{ TRACE_MARKER_TYPE_FUNC_ID,
TRACE_MARKER_TYPE_FUNC_RETADDR,
TRACE_MARKER_TYPE_FUNC_ARG }));
if (k == 0) {
if (type_filter->get_error_string() != "") {
fprintf(stderr, "Couldn't construct a type_filter %s",
type_filter->get_error_string().c_str());
return false;
}
filters.push_back(type_filter.get());
}
// Construct record_filter_t.
auto record_filter = std::unique_ptr<test_record_filter_t>(
new test_record_filter_t(filters, /*stop_timestamp_us=*/0xabcdee));
void *shard_data =
record_filter->parallel_shard_init_stream(0, nullptr, stream.get());
if (!*record_filter) {
fprintf(stderr, "Filtering init failed\n");
return false;
}
// Proccess each trace entry.
for (int i = 0; i < static_cast<int>(entries.size()); ++i) {
bool input = true;
// We need to emulate the stream for the tool, and also
// skip the TRACE_MARKER_TYPE_FILTER_ENDPOINT entry which
// is supposed to be part of only the output, not the input.
if (entries[i].entry.type == TRACE_TYPE_MARKER) {
switch (entries[i].entry.size) {
case TRACE_MARKER_TYPE_TIMESTAMP:
stream->set_last_timestamp(entries[i].entry.addr);
break;
case TRACE_MARKER_TYPE_FILTER_ENDPOINT: input = false; break;
}
}
if (input &&
!record_filter->parallel_shard_memref(shard_data, entries[i].entry)) {
fprintf(stderr, "Filtering failed\n");
return false;
}
}
if (!record_filter->parallel_shard_exit(shard_data) || !*record_filter) {
fprintf(stderr, "Filtering exit failed\n");
return false;
}
// Check filtered output entries.
std::vector<trace_entry_t> filtered = record_filter->get_output_entries();
int j = 0;
for (int i = 0; i < static_cast<int>(entries.size()); ++i) {
if (entries[i].present[k]) {
if (j >= static_cast<int>(filtered.size())) {
fprintf(
stderr,
"Too few entries in filtered output (iter=%d). Expected: ", k);
print_entry(entries[i].entry);
fprintf(stderr, "\n");
return false;
}
// We do not verify encoding content for instructions.
if (memcmp(&filtered[j], &entries[i].entry, sizeof(trace_entry_t)) != 0) {
fprintf(stderr, "Wrong filter result for iter=%d. Expected: ", k);
print_entry(entries[i].entry);
fprintf(stderr, ", got: ");
print_entry(filtered[j]);
fprintf(stderr, "\n");
return false;
}
++j;
}
}
if (j < static_cast<int>(filtered.size())) {
fprintf(stderr,
"Got %d extra entries in filtered output (iter=%d). Next one: ",
static_cast<int>(filtered.size()) - j, k);
print_entry(filtered[j]);
fprintf(stderr, "\n");
return false;
}
}
fprintf(stderr, "test_cache_and_type_filter passed\n");
return true;
}
// Tests I/O for the record_filter.
static bool
test_null_filter()
{
std::string output_dir = op_tmp_output_dir.get_value() + DIRSEP + "null_filter";
......@@ -109,11 +332,12 @@ test_null_filter()
filter_funcs.push_back(null_filter);
record_analysis_tool_t *record_filter =
new dynamorio::drmemtrace::record_filter_t(output_dir, filter_funcs,
/*stop_timestamp_us=*/0,
/*verbosity=*/0);
std::vector<record_analysis_tool_t *> tools;
tools.push_back(record_filter);
record_analyzer_t record_analyzer(op_trace_dir.get_value(), &tools[0],
(int)tools.size());
static_cast<int>(tools.size()));
if (!record_analyzer) {
FATAL_ERROR("Failed to initialize record filter: %s",
record_analyzer.get_error_string().c_str());
......@@ -143,7 +367,7 @@ main(int argc, const char *argv[])
FATAL_ERROR("Usage error: %s\nUsage:\n%s", parse_err.c_str(),
droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str());
}
if (!test_null_filter())
if (!test_cache_and_type_filter() || !test_null_filter())
return 1;
fprintf(stderr, "All done!\n");
return 0;
......
......@@ -120,7 +120,7 @@ public:
std::string
run_test_helper(view_t &view, const std::vector<memref_t> &memrefs)
{
class local_stream_t : public memtrace_stream_t {
class local_stream_t : public test_memtrace_stream_t {
public:
local_stream_t(view_t &view, const std::vector<memref_t> &memrefs)
: view_(view)
......@@ -159,36 +159,6 @@ run_test_helper(view_t &view, const std::vector<memref_t> &memrefs)
{
return instr_count_;
}
std::string
get_stream_name() const override
{
return "";
}
uint64_t
get_version() const override
{
return 0;
}
uint64_t
get_filetype() const override
{
return 0;
}
uint64_t
get_cache_line_size() const override
{
return 0;
}
uint64_t
get_chunk_instr_count() const override
{
return 0;
}
uint64_t
get_page_size() const override
{
return 0;
}
private:
view_t &view_;
......
/* **********************************************************
* Copyright (c) 2022 Google, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#ifndef _CACHE_FILTER_H_
#define _CACHE_FILTER_H_ 1
#include "record_filter.h"
#include "../simulator/cache_lru.h"
namespace dynamorio {
namespace drmemtrace {
class cache_filter_stats_t : public cache_stats_t {
public:
cache_filter_stats_t(int block_size)
: cache_stats_t(block_size)
, did_last_access_hit_(false)
{
}
void
access(const memref_t &memref, bool hit, caching_device_block_t *cache_block) override
{
did_last_access_hit_ = hit;
cache_stats_t::access(memref, hit, cache_block);
}
// Returns whether the last access to the cache was a hit.
bool
did_last_access_hit()
{
return did_last_access_hit_;
}
private:
bool did_last_access_hit_;
};
class cache_filter_t : public record_filter_t::record_filter_func_t {
public:
cache_filter_t(int cache_associativity, int cache_line_size, int cache_size,
bool filter_data, bool filter_instrs)
: cache_associativity_(cache_associativity)
, cache_line_size_(cache_line_size)
, cache_size_(cache_size)
, filter_data_(filter_data)
, filter_instrs_(filter_instrs)
{
}
void *
parallel_shard_init(memtrace_stream_t *shard_stream) override
{
per_shard_t *per_shard = new per_shard_t;
if (!(per_shard->cache.init(cache_associativity_, cache_line_size_, cache_size_,
nullptr, new cache_filter_stats_t(cache_line_size_),
nullptr))) {
error_string_ = "Failed to initialize cache.";
return nullptr;
}
return per_shard;
}
bool
parallel_shard_filter(const trace_entry_t &entry, void *shard_data) override
{
per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
bool output = true;
// We don't process flush entries here.
if ((filter_data_ &&
(entry.type == TRACE_TYPE_READ || entry.type == TRACE_TYPE_WRITE ||
type_is_prefetch(static_cast<trace_type_t>(entry.type)))) ||
(filter_instrs_ && type_is_instr(static_cast<trace_type_t>(entry.type)))) {
memref_t ref;
ref.data.type = static_cast<trace_type_t>(entry.type);
ref.data.size = entry.size;
ref.data.addr = entry.addr;
per_shard->cache.request(ref);
output =
!reinterpret_cast<cache_filter_stats_t *>(per_shard->cache.get_stats())
->did_last_access_hit();
}
return output;
}
bool
parallel_shard_exit(void *shard_data) override
{
per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
delete per_shard->cache.get_stats();
delete per_shard;
return true;
}
private:
struct per_shard_t {
cache_lru_t cache;
};
int cache_associativity_;
int cache_line_size_;
int cache_size_;
bool filter_data_;
bool filter_instrs_;
};
} // namespace drmemtrace
} // namespace dynamorio
#endif /* _CACHE_FILTER_H_ */
......@@ -41,7 +41,7 @@ namespace drmemtrace {
class null_filter_t : public record_filter_t::record_filter_func_t {
public:
void *
parallel_shard_init() override
parallel_shard_init(memtrace_stream_t *shard_stream) override
{
return nullptr;
}
......
......@@ -30,6 +30,7 @@
* DAMAGE.
*/
#include <iostream>
#include <fstream>
#include <memory>
#include <vector>
......@@ -60,10 +61,13 @@ namespace drmemtrace {
record_filter_t::record_filter_t(const std::string &output_dir,
const std::vector<record_filter_func_t *> &filters,
unsigned int verbose)
uint64_t stop_timestamp, unsigned int verbose)
: output_dir_(output_dir)
, filters_(filters)
, stop_timestamp_(stop_timestamp)
, verbosity_(verbose)
, input_entry_count_(0)
, output_entry_count_(0)
{
UNUSED(verbosity_);
UNUSED(output_prefix_);
......@@ -80,16 +84,18 @@ record_filter_t::parallel_shard_supported()
}
std::unique_ptr<std::ostream>
record_filter_t::get_writer(const std::string &path)
record_filter_t::get_writer(per_shard_t *per_shard, memtrace_stream_t *shard_stream)
{
per_shard->output_path = output_dir_ + DIRSEP + shard_stream->get_stream_name();
#ifdef HAS_ZLIB
if (ends_with(path, ".gz")) {
VPRINT(this, 3, "Using the gzip writer for %s\n", path.c_str());
return std::unique_ptr<std::ostream>(new gzip_ostream_t(path));
if (ends_with(per_shard->output_path, ".gz")) {
VPRINT(this, 3, "Using the gzip writer for %s\n", per_shard->output_path.c_str());
return std::unique_ptr<std::ostream>(new gzip_ostream_t(per_shard->output_path));
}
#endif
VPRINT(this, 3, "Using the default writer for %s\n", path.c_str());
return std::unique_ptr<std::ostream>(new std::ofstream(path, std::ofstream::binary));
VPRINT(this, 3, "Using the default writer for %s\n", per_shard->output_path.c_str());
return std::unique_ptr<std::ostream>(
new std::ofstream(per_shard->output_path, std::ofstream::binary));
}
void *
......@@ -97,14 +103,20 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data,
memtrace_stream_t *shard_stream)
{
auto per_shard = new per_shard_t;
per_shard->output_path = output_dir_ + DIRSEP + shard_stream->get_stream_name();
per_shard->writer = get_writer(per_shard->output_path);
per_shard->writer = get_writer(per_shard, shard_stream);
per_shard->shard_stream = shard_stream;
per_shard->enabled = true;
if (!per_shard->writer) {
per_shard->error = "Could not open a writer for " + per_shard->output_path;
success_ = false;
}
for (record_filter_func_t *f : filters_) {
filter_shard_data_.push_back(f->parallel_shard_init());
per_shard->filter_shard_data.push_back(f->parallel_shard_init(shard_stream));
if (f->get_error_string() != "") {
per_shard->error =
"Failure in initializing filter function " + f->get_error_string();
success_ = false;
}
}
return reinterpret_cast<void *>(per_shard);
}
......@@ -112,12 +124,15 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data,
bool
record_filter_t::parallel_shard_exit(void *shard_data)
{
per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
input_entry_count_ += per_shard->input_entry_count;
output_entry_count_ += per_shard->output_entry_count;
bool res = true;
for (int i = 0; i < (int)filters_.size(); ++i) {
if (!filters_[i]->parallel_shard_exit(filter_shard_data_[i]))
for (int i = 0; i < static_cast<int>(filters_.size()); ++i) {
if (!filters_[i]->parallel_shard_exit(per_shard->filter_shard_data[i]))
res = false;
}
delete reinterpret_cast<per_shard_t *>(shard_data);
delete per_shard;
return res;
}
......@@ -128,23 +143,74 @@ record_filter_t::parallel_shard_error(void *shard_data)
return per_shard->error;
}
bool
record_filter_t::write_trace_entry(per_shard_t *shard, const trace_entry_t &entry)
{
if (!shard->writer->write((char *)&entry, sizeof(entry))) {
shard->error = "Failed to write to output file " + shard->output_path;
success_ = false;
return false;
}
++shard->output_entry_count;
return true;
}
bool
record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &entry)
{
per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
++per_shard->input_entry_count;
bool output = true;
for (int i = 0; i < (int)filters_.size(); ++i) {
if (!filters_[i]->parallel_shard_filter(entry, filter_shard_data_[i]))
if (per_shard->enabled && stop_timestamp_ != 0 &&
per_shard->shard_stream->get_last_timestamp() >= stop_timestamp_) {
per_shard->enabled = false;
trace_entry_t filter_boundary_entry = { TRACE_TYPE_MARKER,
TRACE_MARKER_TYPE_FILTER_ENDPOINT,
{ 0 } };
if (!write_trace_entry(per_shard, filter_boundary_entry))
return false;
}
if (per_shard->enabled) {
for (int i = 0; i < static_cast<int>(filters_.size()); ++i) {
if (!filters_[i]->parallel_shard_filter(entry,
per_shard->filter_shard_data[i]))
output = false;
}
}
// Optimize space by outputting the unit header only if we are outputting something
// from that unit.
if (entry.type == TRACE_TYPE_MARKER) {
switch (entry.size) {
case TRACE_MARKER_TYPE_TIMESTAMP:
// No need to remember the previous unit's header anymore. We're in the
// next unit now.
// XXX: it may happen that we never output a unit header due to this
// optimization. We should ensure that we output it at least once. We
// skip handling this corner case for now.
per_shard->last_filtered_unit_header.clear();
ANNOTATE_FALLTHROUGH;
case TRACE_MARKER_TYPE_WINDOW_ID:
case TRACE_MARKER_TYPE_CPU_ID:
if (output)
per_shard->last_filtered_unit_header.push_back(entry);
output = false;
}
}
// Since we're outputting something from this unit, output its unit header.
if (output && !per_shard->last_filtered_unit_header.empty()) {
for (trace_entry_t &unit_header_entry : per_shard->last_filtered_unit_header) {
if (!write_trace_entry(per_shard, unit_header_entry))
return false;
}
per_shard->last_filtered_unit_header.clear();
}
// XXX i#5675: Currently we support writing to a single output file, but we may
// want to write to multiple in the same run; e.g. splitting a trace. For now,
// we can simply run the tool multiple times, but it can be made more efficient.
if (output && !per_shard->writer->write((char *)&entry, sizeof(entry))) {
per_shard->error = "Failed to write to output file " + per_shard->output_path;
success_ = false;
if (output && !write_trace_entry(per_shard, entry))
return false;
}
return true;
}
......@@ -159,7 +225,8 @@ record_filter_t::process_memref(const trace_entry_t &memref)
bool
record_filter_t::print_results()
{
// TODO i#5675: Print stats about filtered entries.
std::cerr << "Output " << output_entry_count_ << " entries from "
<< input_entry_count_ << " entries.\n";
return true;
}
......
......@@ -66,7 +66,7 @@ public:
* This routine can be used to initialize state for each shard.
*/
virtual void *
parallel_shard_init() = 0;
parallel_shard_init(memtrace_stream_t *shard_stream) = 0;
/**
* Invoked for each #trace_entry_t in the shard. It returns
* whether or not this \p entry should be included in the result
......@@ -83,11 +83,23 @@ public:
*/
virtual bool
parallel_shard_exit(void *shard_data) = 0;
/**
* Returns the error string. If no error occurred, it will be empty.
*/
std::string
get_error_string()
{
return error_string_;
}
protected:
std::string error_string_;
};
record_filter_t(const std::string &output_dir,
const std::vector<record_filter_func_t *> &filters,
unsigned int verbose);
uint64_t stop_timestamp, unsigned int verbose);
~record_filter_t() override;
bool
process_memref(const trace_entry_t &entry) override;
......@@ -105,21 +117,33 @@ public:
std::string
parallel_shard_error(void *shard_data) override;
private:
protected:
struct per_shard_t {
std::string output_path;
std::unique_ptr<std::ostream> writer;
std::string error;
std::vector<void *> filter_shard_data;
std::vector<trace_entry_t> last_filtered_unit_header;
uint64_t input_entry_count;
uint64_t output_entry_count;
memtrace_stream_t *shard_stream;
bool enabled;
};
std::unique_ptr<std::ostream>
get_writer(const std::string &path);
private:
virtual bool
write_trace_entry(per_shard_t *shard, const trace_entry_t &entry);
virtual std::unique_ptr<std::ostream>
get_writer(per_shard_t *per_shard, memtrace_stream_t *shard_stream);
std::string output_dir_;
std::vector<record_filter_func_t *> filters_;
std::vector<void *> filter_shard_data_;
uint64_t stop_timestamp_;
unsigned int verbosity_;
const char *output_prefix_ = "[record_filter]";
uint64_t input_entry_count_;
uint64_t output_entry_count_;
};
} // namespace drmemtrace
......
/* **********************************************************
* Copyright (c) 2022 Google, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#ifndef _TYPE_FILTER_H_
#define _TYPE_FILTER_H_ 1
#include "record_filter.h"
#include "trace_entry.h"
#include <vector>
#include <unordered_set>
#ifdef MACOS
// Provide trace_type_t and trace_marker_type_t specializations to
// allow declaration of unordered_set<_> below. This was needed for
// OSX particularly. In C++14, std::hash works as expected for enums
// too: https://cplusplus.com/forum/general/238538/.
namespace std {
template <> struct hash<trace_type_t> {
size_t
operator()(trace_type_t t) const
{
return static_cast<size_t>(t);
}
};
template <> struct hash<trace_marker_type_t> {
size_t
operator()(trace_marker_type_t t) const
{
return static_cast<size_t>(t);
}
};
} // namespace std
#endif
namespace dynamorio {
namespace drmemtrace {
class type_filter_t : public record_filter_t::record_filter_func_t {
public:
type_filter_t(std::vector<trace_type_t> remove_trace_types,
std::vector<trace_marker_type_t> remove_marker_types)
{
for (auto trace_type : remove_trace_types) {
remove_trace_types_.insert(trace_type);
}
for (auto marker_type : remove_marker_types) {
remove_marker_types_.insert(marker_type);
}
}
void *
parallel_shard_init(memtrace_stream_t *shard_stream) override
{
return nullptr;
}
bool
parallel_shard_filter(const trace_entry_t &entry, void *shard_data) override
{
if (remove_trace_types_.find(static_cast<trace_type_t>(entry.type)) !=
remove_trace_types_.end()) {
return false;
}
if (entry.type == TRACE_TYPE_MARKER) {
return remove_marker_types_.find(static_cast<trace_marker_type_t>(
entry.size)) == remove_marker_types_.end();
}
return true;
}
bool
parallel_shard_exit(void *shard_data) override
{
return true;
}
private:
std::unordered_set<trace_type_t> remove_trace_types_;
std::unordered_set<trace_marker_type_t> remove_marker_types_;
};
} // namespace drmemtrace
} // namespace dynamorio
#endif /* _TYPE_FILTER_H_ */
......@@ -36,6 +36,8 @@
#include "droption.h"
#include "dr_frontend.h"
#include "tools/filter/null_filter.h"
#include "tools/filter/cache_filter.h"
#include "tools/filter/type_filter.h"
#include "tools/filter/record_filter.h"
#define FATAL_ERROR(msg, ...) \
......@@ -59,6 +61,48 @@ static droption_t<unsigned int> op_verbose(DROPTION_SCOPE_ALL, "verbose", 0, 0,
"Verbosity level",
"Verbosity level for notifications.");
static droption_t<uint64_t>
op_stop_timestamp(DROPTION_SCOPE_ALL, "stop_timestamp", 0, 0,
std::numeric_limits<uint64_t>::max(),
"Timestamp (in us) in the trace when to stop filtering.",
"Record filtering will be disabled (everything will be output) "
"when the tool sees a TRACE_MARKER_TYPE_TIMESTAMP marker with "
"timestamp greater than the specified value.");
static droption_t<int> op_cache_filter_size(
DROPTION_SCOPE_FRONTEND, "cache_filter_size", 0,
"[Required] Enable data cache filter with given size (in bytes).",
"Enable data cache filter with given size (in bytes), with 64 byte "
"line size and a direct mapped LRU cache.");
static droption_t<std::string> op_remove_trace_types(
DROPTION_SCOPE_FRONTEND, "remove_trace_types", "",
"[Required] Comma-separated integers for trace types to remove.",
"Comma-separated integers for trace types to remove. "
"See trace_type_t for the list of trace entry types.");
static droption_t<std::string> op_remove_marker_types(
DROPTION_SCOPE_FRONTEND, "remove_marker_types", "",
"[Required] Comma-separated integers for marker types to remove.",
"Comma-separated integers for marker types to remove. "
"See trace_marker_type_t for the list of marker types.");
template <typename T>
std::vector<T>
parse_string(const std::string &s, char sep = ',')
{
size_t pos, at = 0;
if (s.empty())
return {};
std::vector<T> vec;
do {
pos = s.find(sep, at);
vec.push_back(static_cast<T>(std::stoi(s.substr(at, pos))));
at = pos + 1;
} while (pos != std::string::npos);
return vec;
}
int
_tmain(int argc, const TCHAR *targv[])
{
......@@ -75,17 +119,41 @@ _tmain(int argc, const TCHAR *targv[])
droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str());
}
dynamorio::drmemtrace::record_filter_t::record_filter_func_t *null_filter =
new dynamorio::drmemtrace::null_filter_t();
std::vector<dynamorio::drmemtrace::record_filter_t::record_filter_func_t *>
std::vector<
std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>>
filter_funcs;
filter_funcs.push_back(null_filter);
if (op_cache_filter_size.specified()) {
filter_funcs.emplace_back(
std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>(
// XXX: add more command-line options to allow the user to set these
// parameters.
new dynamorio::drmemtrace::cache_filter_t(
/*cache_associativity=*/1, /*cache_line_size=*/64,
op_cache_filter_size.get_value(),
/*filter_data=*/true, /*filter_instrs=*/false)));
}
if (op_remove_trace_types.specified() || op_remove_marker_types.specified()) {
std::vector<trace_type_t> filter_trace_types =
parse_string<trace_type_t>(op_remove_trace_types.get_value());
std::vector<trace_marker_type_t> filter_marker_types =
parse_string<trace_marker_type_t>(op_remove_marker_types.get_value());
filter_funcs.emplace_back(
std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>(
new dynamorio::drmemtrace::type_filter_t(filter_trace_types,
filter_marker_types)));
}
std::vector<dynamorio::drmemtrace::record_filter_t::record_filter_func_t *>
filter_func_ptrs;
for (auto &f : filter_funcs)
filter_func_ptrs.push_back(f.get());
// TODO i#5675: Add other filters.
record_analysis_tool_t *record_filter = new dynamorio::drmemtrace::record_filter_t(
op_output_dir.get_value(), filter_funcs, op_verbose.get_value());
auto record_filter = std::unique_ptr<record_analysis_tool_t>(
new dynamorio::drmemtrace::record_filter_t(
op_output_dir.get_value(), filter_func_ptrs, op_stop_timestamp.get_value(),
op_verbose.get_value()));
std::vector<record_analysis_tool_t *> tools;
tools.push_back(record_filter);
tools.push_back(record_filter.get());
record_analyzer_t record_analyzer(op_trace_dir.get_value(), &tools[0],
(int)tools.size());
......@@ -98,8 +166,6 @@ _tmain(int argc, const TCHAR *targv[])
record_analyzer.get_error_string().c_str());
}
record_analyzer.print_stats();
delete null_filter;
delete record_filter;
fprintf(stderr, "Done!\n");
return 0;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment