Skip to content

Commit

Permalink
Add new dimension error_message to file logger metrics.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 615767732
  • Loading branch information
tensorflower-gardener committed Mar 14, 2024
1 parent 7e77bc3 commit 9e4fd08
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 18 deletions.
29 changes: 16 additions & 13 deletions tensorflow/core/framework/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,12 @@ auto* tf_data_filename_counter = tsl::monitoring::Counter<2>::New(

auto* tf_data_file_logger_attempts_counter = tsl::monitoring::Counter<0>::New(
"/tensorflow/data/file_logger_attempts",
"The number of times a file logger attempted to log "
"filenames.");
"The number of times a file logger attempted to log filenames.");

auto* tf_data_file_logger_errors_counter = tsl::monitoring::Counter<1>::New(
auto* tf_data_file_logger_errors_counter = tsl::monitoring::Counter<2>::New(
"/tensorflow/data/file_logger_errors",
"The number of times file logger got error of this type occurred with "
"this ",
"status_code");
"The number of times file logger got error of this type and message.",
"error_code", "error_message");

auto* tf_data_file_logger_attempted_num_files_counter =
tsl::monitoring::Counter<0>::New(
Expand All @@ -291,11 +289,11 @@ auto* tf_data_file_logger_attempted_num_files_counter =
"logger.");

auto* tf_data_file_logger_errors_num_files_counter =
tsl::monitoring::Counter<1>::New(
tsl::monitoring::Counter<2>::New(
"/tensorflow/data/file_logger_errors_num_files",
"The number of files that encountered errors of this type and code "
"The number of files that encountered errors of this type and message "
"during logging by the file logger.",
"status_code");
"error_code", "error_message");

auto* tf_data_model_gauge =
tsl::monitoring::Gauge<std::function<std::string()>, 1>::New(
Expand Down Expand Up @@ -698,8 +696,10 @@ void RecordTFDataFileLoggerAttempts() {
tf_data_file_logger_attempts_counter->GetCell()->IncrementBy(1);
}

void RecordTFDataFileLoggerErrors(error::Code code) {
tf_data_file_logger_errors_counter->GetCell(error::Code_Name(code))
void RecordTFDataFileLoggerErrors(error::Code error_code,
const string& error_message) {
tf_data_file_logger_errors_counter
->GetCell(error::Code_Name(error_code), error_message)
->IncrementBy(1);
}

Expand All @@ -708,8 +708,11 @@ void RecordTFDataFileLoggerAttemptedNumFiles(size_t num_files) {
num_files);
}

void RecordTFDataFileLoggerErrorsNumFiles(size_t num_files, error::Code code) {
tf_data_file_logger_errors_num_files_counter->GetCell(error::Code_Name(code))
void RecordTFDataFileLoggerErrorsNumFiles(size_t num_files,
error::Code error_code,
const string& error_message) {
tf_data_file_logger_errors_num_files_counter
->GetCell(error::Code_Name(error_code), error_message)
->IncrementBy(num_files);
}

Expand Down
14 changes: 9 additions & 5 deletions tensorflow/core/framework/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,15 +200,19 @@ void RecordTFDataFilename(const string& name, const string& filename);
// Records the total attempts made by file logger.
void RecordTFDataFileLoggerAttempts();

// Records the total errors encountered by file logger with this error code.
void RecordTFDataFileLoggerErrors(error::Code code);
// Records an error of type `code` with message `error_message` encountered by
// file logger.
void RecordTFDataFileLoggerErrors(error::Code code,
const string& error_message);

// Records the total number of files attempted to be logged by file logger.
void RecordTFDataFileLoggerAttemptedNumFiles(size_t num_files);

// Records the total number of files that encountered errors during logging by
// file logger with this error code.
void RecordTFDataFileLoggerErrorsNumFiles(size_t num_files, error::Code code);
// Records the number of files that encountered an error of type
// `code` with message `error_message` during logging by file logger with this
// error code.
void RecordTFDataFileLoggerErrorsNumFiles(size_t num_files, error::Code code,
const string& error_message);

// Records statistics of tf.data auto sharding.
//
Expand Down

0 comments on commit 9e4fd08

Please sign in to comment.