Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finetuning X3D model on Hagrid dataset #129

Merged
merged 6 commits into from
Dec 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21,336 changes: 21,336 additions & 0 deletions pytorchvideo/lightning_logs/HaGrid/aggregated_logs/all_training_logs_in_one_file.csv

Large diffs are not rendered by default.

128 changes: 128 additions & 0 deletions pytorchvideo/lightning_logs/HaGrid/aggregated_logs/tflogs2pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python3

import glob
import os
import pprint
import traceback

import click
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator


# Extraction function
def tflog2pandas(path: str) -> pd.DataFrame:
"""convert single tensorflow log file to pandas DataFrame

Parameters
----------
path : str
path to tensorflow log file

Returns
-------
pd.DataFrame
converted dataframe
"""
DEFAULT_SIZE_GUIDANCE = {
"compressedHistograms": 1,
"images": 1,
"scalars": 0, # 0 means load all
"histograms": 1,
}
runlog_data = pd.DataFrame({"metric": [], "value": [], "step": []})
try:
event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE)
event_acc.Reload()
tags = event_acc.Tags()["scalars"]
for tag in tags:
event_list = event_acc.Scalars(tag)
values = list(map(lambda x: x.value, event_list))
step = list(map(lambda x: x.step, event_list))
r = {"metric": [tag] * len(step), "value": values, "step": step}
r = pd.DataFrame(r)
runlog_data = pd.concat([runlog_data, r])
# Dirty catch of DataLossError
except Exception:
print("Event file possibly corrupt: {}".format(path))
traceback.print_exc()
return runlog_data


def many_logs2pandas(event_paths):
all_logs = pd.DataFrame()
for path in event_paths:
log = tflog2pandas(path)
if log is not None:
if all_logs.shape[0] == 0:
all_logs = log
else:
all_logs = all_logs.append(log, ignore_index=True)
return all_logs


@click.command()
@click.argument("logdir-or-logfile")
@click.option(
"--write-pkl/--no-write-pkl", help="save to pickle file or not", default=False
)
@click.option(
"--write-csv/--no-write-csv", help="save to csv file or not", default=True
)
@click.option("--out-dir", "-o", help="output directory", default=".")
def main(logdir_or_logfile: str, write_pkl: bool, write_csv: bool, out_dir: str):
"""This is a enhanced version of
https://gist.github.com/ptschandl/ef67bbaa93ec67aba2cab0a7af47700b

This script exctracts variables from all logs from tensorflow event
files ("event*"),
writes them to Pandas and finally stores them a csv-file or
pickle-file including all (readable) runs of the logging directory.

Example usage:

# create csv file from all tensorflow logs in provided directory (.)
# and write it to folder "./converted"
tflogs2pandas.py . --write-csv --no-write-pkl --o converted

# creaste csv file from tensorflow logfile only and write into
# and write it to folder "./converted"
tflogs2pandas.py tflog.hostname.12345 --write-csv --no-write-pkl --o converted
"""
pp = pprint.PrettyPrinter(indent=4)
if os.path.isdir(logdir_or_logfile):
# Get all event* runs from logging_dir subdirectories
event_paths = glob.glob(os.path.join(logdir_or_logfile, "event*"))
elif os.path.isfile(logdir_or_logfile):
event_paths = [logdir_or_logfile]
else:
raise ValueError(
"input argument {} has to be a file or a directory".format(
logdir_or_logfile
)
)
# Call & append
if event_paths:
pp.pprint("Found tensorflow logs to process:")
pp.pprint(event_paths)
all_logs = many_logs2pandas(event_paths)
pp.pprint("Head of created dataframe")
pp.pprint(all_logs.head())

os.makedirs(out_dir, exist_ok=True)
if write_csv:
print("saving to csv file")
out_file = os.path.join(out_dir, "all_training_logs_in_one_file.csv")
print(out_file)
all_logs.to_csv(out_file, index=None)
if write_pkl:
print("saving to pickle file")
out_file = os.path.join(out_dir, "all_training_logs_in_one_file.pkl")
print(out_file)
all_logs.to_pickle(out_file)
else:
print("No event paths have been found.")


if __name__ == "__main__":
main()
Loading