-
Notifications
You must be signed in to change notification settings - Fork 0
/
clai_unconf23.bib
62 lines (56 loc) · 8.93 KB
/
clai_unconf23.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@Proceedings{CLAI-Unconf-2023,
booktitle = {Proceedings of the 1st ContinualAI Unconference, 2023},
name = {ContinualAI Unconference},
shortname = {CLAI Unconf},
editor = {Swaroop, Siddharth and Mundt, Martin and Aljundi, Rahaf and Khan, Mohammad Emtiyaz},
volume = {249},
year = {2023},
start = {2023-10-09},
end = {2023-10-09},
published = {2024-08-14},
conference_url = {https://unconf.continualai.org/},
address = {Virtual},
conference_number = {1}
}
@InProceedings{semola23,
title = {Adaptive Hyperparameter Optimization for Continual Learning Scenarios},
author = {Semola, Rudy and Hurtado, Julio and Lomonaco, Vincenzo and Bacciu, Davide},
pages = {1-14},
abstract = {Hyperparameter selection in continual learning scenarios is a challenging and underexplored aspect, especially in practical non-stationary environments. Traditional approaches, such as grid searches with held-out validation data from all tasks, are unrealistic for building accurate lifelong learning systems. This paper aims to explore the role of hyperparameter selection in continual learning and the necessity of continually and automatically tuning them according to the complexity of the task at hand. Hence, we propose leveraging the nature of sequence task learning to improve Hyperparameter Optimization efficiency. By using the functional analysis of variance-based techniques, we identify the most crucial hyperparameters that have an impact on performance. We demonstrate empirically that this approach, agnostic to continual scenarios and strategies, allows us to speed up hyperparameters optimization continually across tasks and exhibit robustness even in the face of varying sequential task orders. We believe that our findings can contribute to the advancement of continual learning methodologies towards more efficient, robust and adaptable models for real-world applications.},
openreview = {ZWrG1YlEMY}
}
@InProceedings{yildirim23,
title = {AdaCL: Adaptive Continual Learning},
author = {Yildirim, Elif Ceren Gok and Yildirim, Murat Onur and Kilickaya, Mert and Vanschoren, Joaquin},
pages = {15-24},
abstract = {Class-Incremental Learning aims to update a deep classifier to learn new categories while maintaining or improving its accuracy on previously observed classes. Common methods to prevent forgetting previously learned classes include regularizing the neural network updates and storing exemplars in memory, which come with hyperparameters such as the learning rate, regularization strength, or the number of exemplars. However, these hyperparameters are usually only tuned at the start and then kept fixed throughout the learning sessions, ignoring the fact that newly encountered tasks may have varying levels of novelty or difficulty. This study investigates the necessity of hyperparameter 'adaptivity' in Class-Incremental Learning: the ability to dynamically adjust hyperparameters such as the learning rate, regularization strength, and memory size according to the properties of the new task at hand. We propose AdaCL, a Bayesian Optimization-based approach to automatically and efficiently determine the optimal values for those parameters with each learning task. We show that adapting hyperpararmeters on each new task leads to improvement in accuracy, forgetting and memory. Code is available at https://github.com/ElifCerenGokYildirim/AdaCL.},
openreview = {Q6nDTJVGCm}
}
@InProceedings{castellana23,
title = {CD-IMM: The Benefits of Domain-based Mixture Models in Bayesian Continual Learning},
author = {Castellana, Daniele and Carta, Antonio and Bacciu, Davide},
pages = {25-36},
abstract = {Real-world streams of data are characterised by the continuous occurrence of new and old classes, possibly on novel domains. Bayesian non-parametric mixture models provide a natural solution for continual learning due to their ability to create new components on the fly when new data are observed. However, popular class-based and time-based mixtures are often tested on simplified streams (eg class-incremental), where shortcuts can be exploited to infer drifts. We hypothesise that domain-based mixtures are more effective on natural streams. Our proposed method, the CD-IMM, exemplifies this approach by learning an infinite mixture of domains for each class. We experiment on a natural scenario with a mix of class repetitions and novel domains to validate our hypothesis. The experimental results confirm our hypothesis and we find that CD-IMM beats state-of-the-art bayesian continual learning methods.},
openreview = {0VOYqOiSSk}
}
@InProceedings{hess23,
title = {Two Complementary Perspectives to Continual Learning: Ask Not Only What to Optimize, But Also How},
author = {Hess, Timm and Tuytelaars, Tinne and van de Ven, Gido M},
pages = {37-61},
abstract = {Recent years have seen considerable progress in the continual training of deep neural networks, predominantly thanks to approaches that add replay or regularization terms to the loss function to approximate the joint loss over all tasks so far. However, we show that even with a perfect approximation to the joint loss, these approaches still suffer from temporary but substantial forgetting when starting to train on a new task. Motivated by this `stability gap', we propose that continual learning strategies should focus not only on the optimization objective, but also on the way this objective is optimized. While there is some continual learning work that alters the optimization trajectory (e.g., using gradient projection techniques), this line of research is positioned as alternative to improving the optimization objective, while we argue it should be complementary. In search of empirical support for our proposition, we perform a series of pre-registered experiments combining replay-approximated joint objectives with gradient projection-based optimization routines. However, this first experimental attempt fails to show clear and consistent benefits. Nevertheless, our conceptual arguments, as well as some of our empirical results, demonstrate the distinctive importance of the optimization trajectory in continual learning, thereby opening up a new direction for continual learning research.},
openreview = {urLZuoayaD}
}
@InProceedings{malherbe23,
title = {Implicit Neural Representation as vectorizer for classification task applied to diverse data structures},
author = {Malherbe, Thibault},
pages = {62-76},
abstract = {Implicit neural representations have recently emerged as a promising tool in data science research for their ability to learn complex, high-dimensional functions without requiring explicit equations or hand-crafted features. Here we aim to use these implicit neural representations weights to represent batch of data and use it to classify these batch based only on these weights, without any feature engineering on the raw data. In this study, we demonstrate that this method yields very promising results in data classification of several type of data, such as sound, images, videos or human activities, without any prior knowledge in the relative field.},
openreview = {VTvytANXYq}
}
@InProceedings{aswani23,
title = {Examining Changes in Internal Representations of Continual Learning Models Through Tensor Decomposition},
author = {Aswani, Nishant Suresh and Guesmi, Amira and Hanif, Muhammad Abdullah and Shafique, Muhammad},
pages = {62-82},
abstract = {Continual learning (CL) has spurred the development of several methods aimed at consolidating previous knowledge across sequential learning. Yet, the evaluations of these methods have primarily focused on the final output, such as changes in the accuracy of predicted classes, overlooking the issue of representational forgetting within the model. In this paper, we propose a novel representation-based evaluation framework for CL models. This approach involves gathering internal representations from throughout the continual learning process and formulating three-dimensional tensors. The tensors are formed by stacking representations, such as layer activations, generated from several inputs and model `snapshots', throughout the learning process. By conducting tensor component analysis (TCA), we aim to uncover meaningful patterns about how the internal representations evolve, expecting to highlight the merits or shortcomings of examined CL strategies. We conduct our analyses across different model architectures and importance-based continual learning strategies, with a curated task selection. Often, the results of our approach mirror the difference in performance of various CL strategies on various architectures. Ultimately, however, we found that our methodology did not directly highlight specialized clusters of neurons, nor provide an immediate understanding the evolution of filters. We believe a scaled down variation of our approach will provide insight into the benefits and pitfalls of using TCA to study continual learning dynamics.},
openreview = {pyjLrj4o8y}
}