diff --git a/README.md b/README.md index 512e46d..8c78d6e 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ To see the TensorBoard visualization of the training progress: Optionally edit the `hparams.yaml` configuration file in the folder `egs/covers80/config` before starting a training run. -This fork added an hparam.yaml setting of `early_stopping_patience` to support the added feature of early stopping (original CoverHunter defaulted to 10,000 epochs!). +This fork added the hyperparameter `early_stopping_patience` to support the added feature of early stopping (original CoverHunter defaulted to 10,000 epochs!). Note: Don't use the `torchrun` launch command offered in original CoverHunter. In the single-computer Apple Silicon context, it is not only irrelevant, it actually slows down performance. In my tests it slowed down tools.train performance by about 20%. @@ -90,7 +90,6 @@ This figure shows the results of training from scratch on the covers80 dataset w ![t-SNE plot for Covers80](tSNE-example.png) The optional `dist_name` argument is a path where you want to save the distance matrix and ref labels so that you can study the results separately, such as perhaps doing custom t-SNE plots, etc. - See the "Training checkpoint output" section below for a description of the embeddings saved by the `eval_for_map_with_feat()` function called in this script. They are saved in a new subfolder of the `pretrained_model` folder named `embed_NN_tmp` where NN is the highest-numbered epoch subfolder in the `pretrained_model` folder. diff --git a/src/eval_testset.py b/src/eval_testset.py index a5a4196..9f1fffd 100644 --- a/src/eval_testset.py +++ b/src/eval_testset.py @@ -40,7 +40,6 @@ def _cluster_plot(dist_matrix, ref_labels, output_path, test_only_labels=[],logg marker_styles = ['o', 's', '^', 'p', 'x', 'D'] num_colors = len(unique_labels) // len(marker_styles) colors = plt.get_cmap(cmap_name, num_colors)(range(num_colors)) - plt.figure(figsize=(15, 15)) color_dict = {} # Dictionary to store color for each label @@ -84,6 +83,7 @@ def _cluster_plot(dist_matrix, ref_labels, output_path, test_only_labels=[],logg if test_only_labels: plt.text(1, 1.02, "Circles = song_ids not seen in training", ha='right', va='bottom', transform=plt.gca().transAxes) + plt.tight_layout() plt.savefig(output_path) plt.close() diff --git a/tSNE.png b/tSNE.png new file mode 100644 index 0000000..736b3dd Binary files /dev/null and b/tSNE.png differ diff --git a/tools/eval_testset.py b/tools/eval_testset.py index 96bf618..49dc814 100644 --- a/tools/eval_testset.py +++ b/tools/eval_testset.py @@ -23,8 +23,8 @@ def _main(): parser.add_argument('query_path') parser.add_argument('ref_path') parser.add_argument('-query_in_ref_path', default='', type=str) - parser.add_argument('-test_only_labels', default='', type=str, help='Path to list of song_ids reserved for test dataset for use in t-SNE plot.') parser.add_argument('-plot_name', default='', type=str, help='Save a t-SNE plot of the distance matrix to this path') + parser.add_argument('-test_only_labels', default='', type=str, help='Path to list of song_ids reserved for test dataset for use in t-SNE plot.') parser.add_argument('-dist_name', default='', type=str, help='Save the distance matrix to this path') args = parser.parse_args()