-
Notifications
You must be signed in to change notification settings - Fork 0
/
neurreps2023.bib
165 lines (161 loc) · 33.9 KB
/
neurreps2023.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
@Proceedings{neurreps2023,
booktitle = {Proceedings of the 2nd NeurIPS Workshop on Symmetry and Geometry in Neural Representations},
name = {NeurIPS Workshop on Symmetry and Geometry in Neural Representations},
shortname = {NeurReps},
editor = {Sanborn, Sophia and Shewmake, Christian and Azeglio, Simone and Miolane, Nina},
volume = {228},
year = {2023},
start = {2023-12-16},
end = {2023-12-16},
published = {2024-08-02},
conference_url = {https://neurreps.org},
address = {New Orleans, Lousiana, USA}
}
@InProceedings{sanborn2023,
title = {Preface},
author = {Sanborn, Sophia and Shewmake, Christian and Azeglio, Simone and Miolane, Nina},
pages = {i-vii},
}
@InProceedings{he2023,
title = {Sheaf-based Positional Encodings for Graph Neural Networks},
author = {He, Yu and Bodnar, Cristian and Li\`{o}, Pietro},
pages = {1-18},
abstract = {Graph Neural Networks (GNNs) work directly with graph-structured data, capitalising on relational information among entities. One limitation of GNNs is their reliance on local interactions among connected nodes. GNNs may generate identical node embeddings for similar local neighbourhoods and fail to distinguish structurally distinct graphs. Positional encodings help to break the locality constraint by informing the nodes of their global positions in the graph. Furthermore, they are required by Graph Transformers to encode structural information. However, existing positional encodings based on the graph Laplacian only encode structural information and are typically fixed. To address these limitations, we propose a novel approach to design positional encodings using sheaf theory. The sheaf Laplacian can be learnt from node data, allowing it to encode both the structure and semantic information. We present two methodologies for creating sheaf-based positional encodings, showcasing their efficacy in node and graph tasks. Our work advances the integration of sheaves in graph learning, paving the way for innovative GNN techniques that draw inspiration from geometry and topology.}
}
@InProceedings{lu2023,
title = {AMES: A differentiable embedding space selection framework for latent graph inference},
author = {Lu, Yuan and S\`{a}ez de Oc\`{a}riz Borde, Haitz and Li\`{o}o, Pietro},
pages = {19-34},
abstract = {In real-world scenarios, although data entities may possess inherent relationships, the specific graph illustrating their connections might not be directly accessible. Latent graph inference addresses this issue by enabling Graph Neural Networks (GNNs) to operate on point cloud data, dynamically learning the necessary graph structure. These graphs are often derived from a latent embedding space, which can be modeled using Euclidean, hyperbolic, spherical, or product spaces. However, currently, there is no principled differentiable method for determining the optimal embedding space. In this work, we introduce the Attentional Multi-Embedding Selection (AMES) framework, a differentiable method for selecting the best embedding space for latent graph inference through backpropagation, considering a downstream task. Our framework consistently achieves comparable or superior results compared to previous methods for latent graph inference across five benchmark datasets. Importantly, our approach eliminates the need for conducting multiple experiments to identify the optimal embedding space. Furthermore, we explore interpretability techniques that track the gradient contributions of different latent graphs, shedding light on how our attention-based, fully differentiable approach learns to choose the appropriate latent space. In line with previous works, our experiments emphasize the advantages of hyperbolic spaces in enhancing performance. More importantly, our interpretability framework provides a general approach for quantitatively comparing embedding spaces across different tasks based on their contributions, a dimension that has been overlooked in previous literature on latent graph inference.}
}
@InProceedings{nguyen2023,
title = {Fast Temporal Wavelet Graph Neural Networks},
author = {Nguyen, Duc Thien and Nguyen, Manh Duc Tuan and Hy, Truong Son and Kondor, Risi},
pages = {35-54},
abstract = {Spatio-temporal signals forecasting plays an important role in numerous domains, especially in neuroscience and transportation. The task is challenging due to the highly intricate spatial structure, as well as the non-linear temporal dynamics of the network. To facilitate reliable and timely forecast for the human brain and traffic networks, we propose the Fast Temporal Wavelet Graph Neural Networks (FTWGNN) that is both time- and memory-efficient for learning tasks on timeseries data with the underlying graph structure, thanks to the theories of Multiresolution analysis and Wavelet theory on discrete spaces. We employ Multiresolution Matrix Factorization (MMF) (Kondor et al., 2014) to factorize the highly dense graph structure and compute the corresponding sparse wavelet basis that allows us to construct fast wavelet convolution as the backbone of our novel architecture. Experimental results on real-world PEMS-BAY, METR-LA traffic datasets and AJILE12 ECoG dataset show that FTWGNN is competitive with the state-of-the-arts while maintaining a low computational footprint. Our PyTorch implementation is publicly available at https://github.com/HySonLab/TWGNN}
}
@InProceedings{tipton2023,
title = {Haldane bundles: a dataset for learning to predict the Chern number of line bundles on the torus},
author = {Tipton, Cody and Coda, Elizabeth and Brown, Davis and Bittner, Alyson and Lee, Jung and Jorgenson, Grayson and Emerson, Tegan and Kvinge, Henry},
pages = {55-74},
abstract = {Characteristic classes, which are abstract topological invariants associated with vector bundles, have become an important notion in modern physics with surprising real-world consequences. As a representative example, the incredible properties of topological insulators, which are insulators in their bulk but conductors on their surface, can be completely characterized by a specific characteristic class associated with their electronic band structure, the first Chern class. Given their importance to next generation computing and the computational challenge of calculating them using first-principles approaches, there is a need to develop machine learning approaches to predict the characteristic classes associated with a material system. To aid in this program we introduce the Haldane bundle dataset, which consists of synthetically generated complex line bundles on the $2$-torus. We envision this dataset, which is not as challenging as noisy and sparsely measured real-world datasets but (as we show) still difficult for off-the-shelf architectures, to be a testing ground for architectures that incorporate the rich topological and geometric priors underlying characteristic classes.}
}
@InProceedings{kvinge2023,
title = {Internal representations of vision models through the lens of frames on data manifolds},
author = {Kvinge, Henry and Jorgenson, Grayson and Brown, Davis and Godfrey, Charles and Emerson, Tegan},
pages = {75-115},
abstract = {While the last five years have seen considerable progress in understanding the internal representations of deep learning models, many questions remain. This is especially true when trying to understand the impact of model design choices, such as model architecture or training algorithm, on hidden representation geometry and dynamics. In this work we present a new approach to studying such representations inspired by the idea of a frame on the tangent bundle of a manifold. Our construction, which we call a neural frame, is formed by assembling a set of vectors representing specific types of perturbations of a data point, for example infinitesimal augmentations, noise perturbations, or perturbations produced by a generative model, and studying how these change as they pass through a network. Using neural frames, we make observations about the way that models process, layer-by-layer, specific modes of variation within a small neighborhood of a datapoint. Our results provide new perspectives on a number of phenomena, such as the manner in which training with augmentation produces model invariance or the proposed trade-off between adversarial training and model generalization.}
}
@InProceedings{khajehnejad2023,
title = {On complex network dynamics of an in vitro neuronal system during Rest and Gameplay},
author = {Khajehnejad, Moein and Habibollahi, Forough and Loeffler, Alon and Kagan, Brett J. and Razi, Adeel},
pages = {116-128},
abstract = {In this study, we focus on characterizing the complex network dynamics of <em>in vitro</em> neuronal system of live biological cells during two distinct activity states: spontaneous rest state and engagement in a real-time (closed-loop) game environment. We use <em>DishBrain</em> which is a system that embodies <em>in vitro</em> neural networks with <em>in silico</em> computation using a high-density multi-electrode array.
First, we embed the spiking activity of these channels in a lower-dimensional space using various representation learning methods. We then extract a subset of representative channels that are consistent across all of the neuronal preparations. Next, by analyzing these low-dimensional representations, we explore the patterns of macroscopic neuronal network dynamics during the learning process. Remarkably, our findings indicate that just using the low-dimensional embedding of representative channels is sufficient to differentiate the neuronal culture during the Rest and Gameplay conditions.
Furthermore, we characterise the evolving neuronal <em>connectivity</em> patterns within the <em>DishBrain</em> system over time during Gameplay in comparison to the Rest condition. Notably, our investigation shows dynamic changes in the overall connectivity within the same region and across multiple regions on the multi-electrode array only during Gameplay. These findings underscore the plasticity of these neuronal networks in response to external stimuli and highlight the potential for modulating connectivity in a controlled environment.
The ability to distinguish between neuronal states using reduced-dimensional representations points to the presence of underlying patterns that could be pivotal for real-time monitoring and manipulation of neuronal cultures. Additionally, this provides insight into how biological based information processing systems rapidly adapt and learn and may lead to new or improved algorithms.}
}
@InProceedings{sonoda2023,
title = {Joint Group Invariant Functions on Data-Parameter Domain Induce Universal Neural Networks},
author = {Sonoda, Sho and Ishi, Hideyuki and Ishikawa, Isao and Ikeda, Masahiro},
pages = {129-144},
abstract = {The symmetry and geometry of input data are considered to be encoded in the internal data representation inside the neural network, but the specific encoding rule has been less investigated. In this study, we present a systematic method to induce a generalized neural network and its right inverse operator, called the \emph{ridgelet transform}, from a \emph{joint group invariant function} on the data-parameter domain. Since the ridgelet transform is an inverse, (1) it can describe the arrangement of parameters for the network to represent a target function, which is understood as the \emph{encoding rule}, and (2) it implies the \emph{universality} of the network. Based on the group representation theory, we present a new simple proof of the universality by using Schur's lemma in a unified manner covering a wide class of networks, for example, the original ridgelet transform, formal \emph{deep} networks, and the dual voice transform. Since traditional universality theorems were demonstrated based on functional analysis, this study sheds light on the group theoretic aspect of the approximation theory, connecting geometric deep learning to abstract harmonic analysis.}
}
@InProceedings{geng2023,
title = {Scalar Invariant Networks with Zero Bias},
author = {Geng, Chuqin and Xu, Xiaojie and Ye, Haolin and Si, Xujie},
pages = {145-163},
abstract = {Just like weights, bias terms are learnable parameters in many popular machine learning models, including neural networks. Biases are believed to enhance the representational power of neural networks, enabling them to tackle various tasks in computer vision. Nevertheless, we argue that biases can be disregarded for some image-related tasks such as image classification, by considering the intrinsic distribution of images in the input space and desired model properties from first principles. Our empirical results suggest that zero-bias neural networks can perform comparably to normal networks for practical image classification tasks. Furthermore, we demonstrate that zero-bias neural networks possess a valuable property known as scalar (multiplicative) invariance. This implies that the network's predictions remain unchanged even when the contrast of the input image is altered. We further extend the scalar invariance property to more general cases, thereby attaining robustness within specific convex regions of the input space. We believe dropping bias terms can be considered as a geometric prior when designing neural network architecture for image classification, which shares the spirit of adapting convolutions as the translational invariance prior.}
}
@InProceedings{ballester2023,
title = {Decorrelating neurons using persistence},
author = {Ballester, Rub\'en and Casacuberta, Carles and Escalera, Sergio},
pages = {164-182},
abstract = {We propose a novel way to regularise deep learning models by reducing high correlations between neurons. For this, we present two regularisation terms computed from the weights of a minimum spanning tree of the clique whose vertices are the neurons of a given network (or a sample of those), where weights on edges are correlation dissimilarities. We explore their efficacy by performing a set of proof-of-concept experiments, for which our new regularisation terms outperform some popular ones. We demonstrate that, in these experiments, naive minimisation of all correlations between neurons obtains lower accuracies than our regularisation terms. This suggests that redundancies play a significant role in artificial neural networks, as evidenced by some studies in neuroscience for real networks. We include a proof of differentiability of our regularisers, thus developing the first effective topological persistence-based regularisation terms that consider the whole set of neurons and that can be applied to a feedforward architecture in any deep learning task such as classification, data generation, or regression.}
}
@InProceedings{pegoraro2023,
title = {Spectral Maps for Learning on Subgraphs},
author = {Pegoraro, Marco and Marin, Riccardo and Rampini, Arianna and Melzi,Simone and Cosmo, Luca and Rodol\`{a}, Emanuele},
pages = {183-205},
abstract = {In graph learning, maps between graphs and their subgraphs frequently arise. For instance, when coarsening or rewiring operations are present along the pipeline, one needs to keep track of the corresponding nodes between the original and modified graphs. Classically, these maps are represented as binary node-to-node correspondence matrices, and used as-is to transfer node-wise features between the graphs. In this paper, we argue that simply changing this map representation can bring notable benefits to graph learning tasks. Drawing inspiration from recent progress in geometry processing, we introduce a spectral representation for maps that is easy to integrate into existing graph learning models. This spectral representation is a compact and straightforward plug-in replacement, and is robust to topological changes of the graphs. Remarkably, the representation exhibits structural properties that make it interpretable, drawing an analogy with recent results on smooth manifolds. We demonstrate the benefits of incorporating spectral maps in graph learning pipelines, addressing scenarios where a node-to-node map is not well defined, or in the absence of exact isomorphism. Our approach bears practical benefits in knowledge distillation and hierarchical learning, where we show comparable or improved performance at a fraction of the computational cost.}
}
@InProceedings{briola2023,
title = {Homological Convolutional Neural Networks},
author = {Briola, Antonio and Wang, Yuanrong and Bartolucci, Silvia and Aste, Tomaso},
pages = {206-231},
abstract = {Deep learning methods have demonstrated outstanding performances on classification and regression tasks on homogeneous data types (e.g., image, audio, and text data). However, tabular data still pose a challenge, with classic machine learning approaches being often computationally cheaper and equally effective than increasingly complex deep learning architectures. The challenge arises from the fact that, in tabular data, the correlation among features is weaker than the one from spatial or semantic relationships in images or natural language, and the dependency structures need to be modeled without any prior information. In this work, we propose a novel deep learning architecture that exploits the data structural organization through topologically constrained network representations to gain relational information from sparse tabular inputs. The resulting model leverages the power of convolution and is centered on a limited number of concepts from network topology to guarantee: (i) a data-centric and deterministic building pipeline; (ii) a high level of interpretability over the inference process; and (iii) an adequate room for scalability. We test our model on $18$ benchmark datasets against $5$ classic machine learning and $3$ deep learning models, demonstrating that our approach reaches state-of-the-art performances on these challenging datasets. The code to reproduce all our experiments is provided at https://github.com/FinancialComputingUCL/HomologicalCNN.}
}
@InProceedings{han2023,
title = {Curvature Fields from Shading Fields},
author = {Han, Xinran and Zickler, Todd},
pages = {232-254},
abstract = {We re-examine the estimation of 3D shape from images that are caused by shading of diffuse Lambertian surfaces. We propose a neural model that is motivated by the well-documented perceptual effect in which shape is perceived from shading without a precise perception of lighting. Our model operates independently in each receptive field and produces a scalar statistic of surface curvature for that field. The model’s architecture builds on previous mathematical analyses of lighting-invariant shape constraints, and it leverages geometric structure to provide equivariance under image rotations and translations. Applying our model in parallel across a dense set of receptive fields produces a curvature field that we show is quite stable under changes to a surface’s albedo pattern (texture) and also to changes in lighting, even when lighting varies spatially across the surface.}
}
@InProceedings{versteeg2023,
title = {Expressive dynamics models with nonlinear injective readouts enable reliable recovery of latent features from neural activity},
author = {Versteeg, Christopher and Sedler, Andrew R. and McCart, Jonathan D. and Pandarinath, Chethan},
pages = {255-278},
abstract = {An emerging framework in neuroscience uses the rules that govern how a neural circuit's state evolves over time to understand the circuit's underlying computation. While these <em>neural dynamics</em> cannot be directly measured, new techniques attempt to estimate them by modeling observed neural recordings as a low-dimensional latent dynamical system embedded into a higher-dimensional neural space. How these models represent the readout from latent space to neural space can affect the interpretability of the latent representation -- for example, for models with a linear readout could make simple, low-dimensional dynamics unfolding on a non-linear neural manifold appear excessively complex and high-dimensional. Additionally, standard readouts (both linear and non-linear) often lack injectivity, meaning that they don't obligate changes in latent state to directly affect activity in the neural space. During training, non-injective readouts incentivize the model to invent dynamics that misrepresent the underlying system and computation. To address the challenges presented by non-linearity and non-injectivity, we combined a custom readout with a previously developed low-dimensional latent dynamics model to create the Ordinary Differential equations autoencoder with Injective Nonlinear readout (ODIN). We generated a synthetic spiking dataset by non-linearly embedding activity from a low-dimensional dynamical system into higher-D neural activity. We show that, in contrast to alternative models, ODIN is able to recover ground-truth latent activity from these data even when the nature of the system and embedding are unknown. Additionally, we show that ODIN enables the unsupervised recovery of underlying dynamical features (e.g., fixed points) and embedding geometry (e.g., the neural manifold) over alternative models. Overall, ODIN's ability to recover ground-truth latent features with low dimensionality make it a promising method for distilling interpretable dynamics that can explain neural computation.}
}
@InProceedings{suryaka2023,
title = {Pitfalls in Measuring Neural Transferability},
author = {Suresh Suryaka and Abrol Vinayak and Thakur Anshul},
pages = {279-291},
abstract = {Transferability scores quantify the aptness of the pre-trained models for a downstream task and help in selecting an optimal pre-trained model for transfer learning. This work aims to draw attention to the significant shortcomings of state-of-the-art transferability scores. To this aim, we propose \emph{neural collapse-based transferability scores} that analyse intra-class \emph{variability collapse} and inter-class discriminative ability of the penultimate embedding space of a pre-trained model. The experimentation across the image and audio domains demonstrates that such a simple variability analysis of the feature space is sufficient to satisfy the current definition of transferability scores, and there is a requirement for a new generic definition of transferability. Further, building on these results, we highlight new research directions and postulate characteristics of an ideal transferability measure that will be helpful in streamlining future studies targeting this problem.}
}
@InProceedings{mansfield2023,
title = {Random field augmentations for self-supervised representation learning},
author = {Mansfield, Philip Andrew and Afkanpour, Arash and Morningstar, Warren Richard and Singhal, Karan},
pages = {292-302},
abstract = {Self-supervised representation learning is heavily dependent on data augmentations to specify the invariances encoded in representations. Previous work has shown that applying diverse data augmentations is crucial to downstream performance, but augmentation techniques remain under-explored. In this work, we propose a new family of local transformations based on Gaussian random fields to generate image augmentations for self-supervised representation learning. These transformations generalize the well-established affine and color transformations (translation, rotation, color jitter, etc.) and greatly increase the space of augmentations by allowing transformation parameter values to vary from pixel to pixel. The parameters are treated as continuous functions of spatial coordinates, and modeled as independent Gaussian random fields. Empirical results show the effectiveness of the new transformations for self-supervised representation learning. Specifically, we achieve a 1.7\% top-1 accuracy improvement over baseline on ImageNet downstream classification, and a 3.6\% improvement on out-of-distribution iNaturalist downstream classification. However, due to the flexibility of the new transformations, learned representations are sensitive to hyperparameters. While mild transformations improve representations, we observe that strong transformations can degrade the structure of an image, indicating that balancing the diversity and strength of augmentations is important for improving generalization of learned representations.}
}
@InProceedings{kohler2023,
title = {Symmetric models for radar response modeling},
author = {Kohler, Colin and Vaska, Nathan and Muthukrishnan, Ramya and Choi, Whangbong and Park, Jung Yeon and Goodwin, Justin and Caceres, Rajmonda and Walters, Robin},
pages = {303-323},
abstract = {Many radar applications require complex radar signature models that incorporate characteristics of an object's shape and dynamics as well as sensing effects. Even though high-fidelity, first-principles radar simulators are available, they tend to be resource-intensive and do not easily support the requirements of agile and large-scale AI development and evaluation frameworks. Deep learning represents an attractive alternative to these numerical methods, but can have large data requirements and limited generalization ability. In this work, we present the Radar Equivariant Model (REM), the first SO(3)-equivaraint model for predicting radar responses from object meshes. By constraining our model to the symmetries inherent to radar sensing, REM is able to achieve a high level reconstruction of signals generated by a first-principles radar model and shows improved performance and sample efficiency over other encoder-decoder models.}
}
@InProceedings{walker2023,
title = {Explicit neural surfaces: learning continuous geometry with deformation fields},
author = {Walker, Thomas and Mariotti, Octave and Vaxman, Amir and Bilen, Hakan},
pages = {324-345},
abstract = {We introduce Explicit Neural Surfaces (ENS), an efficient smooth surface representation that directly encodes topology with a deformation field from a known base domain. We apply this representation to reconstruct explicit surfaces from multiple views, where we use a series of neural deformation fields to progressively transform the base domain into a target shape. By using meshes as discrete surface proxies, we train the deformation fields through efficient differentiable rasterization. Using a fixed base domain allows us to have Laplace-Beltrami eigenfunctions as an intrinsic positional encoding alongside standard extrinsic Fourier features, with which our approach can capture fine surface details. Compared to implicit surfaces, ENS trains faster and has several orders of magnitude faster inference times. The explicit nature of our approach also allows higher-quality mesh extraction whilst maintaining competitive surface reconstruction performance and real-time capabilities.}
}
@InProceedings{li2023,
title = {Structural Similarities Between Language Models and Neural Response Measurements},
author = {Li, Jiaang and Karamolegkou, Antonia and Kementchedjhieva, Yova and Abdou, Mostafa and S\ogaard, Anders},
pages = {346-365},
abstract = {Large language models have complicated internal dynamics, but induce representations of words and phrases whose geometry we can study. Human language processing is also opaque, but neural response measurements can provide (noisy) recordings of activations during listening or reading, from which we can extract similar representations of words and phrases. Here we study the extent to which the geometries induced by these representations, share similarities in the context of brain decoding. We find that the larger neural language models get, the more their representations are structurally similar to neural response measurements from brain imaging.}
}
@InProceedings{chetan2023,
title = {Distance Learner: Incorporating Manifold Prior to Model Training},
author = {Chetan, Aditya and Kwatra, Nipun},
pages = {366-387},
abstract = {The manifold hypothesis (real-world data concentrates near low-dimensional manifolds) is suggested as the principle behind the effectiveness of machine learning algorithms in very high-dimensional problems that are common in domains such as vision and speech. Multiple methods have been proposed to explicitly incorporate the manifold hypothesis as a prior in modern Deep Neural Networks (DNNs), with varying success. In this paper, we propose a new method, <em>Distance Learner</em>, to incorporate this prior for DNN-based classifiers. <em>Distance Learner</em> is trained to predict the <em>distance</em> of a point from the underlying manifold of each class, rather than the class label. For classification, <em>Distance Learner</em> then chooses the class corresponding to the closest predicted class manifold. <em>Distance Learner</em> can also identify points as being out of distribution (belonging to neither class), if the distance to the closest manifold is higher than a threshold. We evaluate our method on multiple synthetic datasets and show that <em>Distance Learner</em> learns much more meaningful classification boundaries compared to a standard classifier. We also evaluate our method on the task of adversarial robustness and find that it not only outperforms standard classifiers by a large margin but also performs at par with classifiers trained via well-accepted standard adversarial training.}
}
@InProceedings{crisostomi2023,
title = {From charts to atlas: Merging latent spaces into one},
author = {Crisostomi, Donato and Cannistraci, Irene and Moschella, Luca and Barbiero, Pietro and Ciccone, Marco and Li\`{o}, Pietro and Rodol\`{a}, Emanuele},
pages = {388-404},
abstract = {Models trained on semantically related datasets and tasks exhibit comparable inter-sample relations within their latent spaces. We investigate in this study the aggregation of such latent spaces to create a unified space encompassing the combined information.
To this end, we introduce Relative Latent Space Aggregation, a two-step approach that first renders the spaces comparable using relative representations, and then aggregates them via a simple mean. We carefully divide a classification problem into a series of learning tasks under three different settings: sharing samples, classes, or neither. We then train a model on each task and aggregate the resulting latent spaces.
We compare the aggregated space with that derived from an end-to-end model trained over all tasks and show that the two spaces are similar. We then observe that the aggregated space is better suited for classification, and empirically demonstrate that it is due to the unique imprints left by task-specific embedders within the representations. We finally test our framework in scenarios where no shared region exists and show that it can still be used to merge the spaces, albeit with diminished benefits over naive merging.}
}
@InProceedings{mochizuki-freeman2023,
title = {Geometry of abstract learned knowledge in deep RL agents},
author = {Mochizuki-Freeman, James and Kabir, Md Rysul and Gulecha, Mitesh and Tiganj, Zoran},
pages = {405-424},
abstract = {Data from neural recordings suggest that mammalian brains represent physical and abstract task-relevant variables through low-dimensional neural manifolds. In a recent electrophysiological study (Nieh et al., 2021), mice performed an evidence accumulation task while moving along a virtual track. Nonlinear dimensionality reduction of the population activity revealed that task-relevant variables were jointly mapped in an orderly manner in the low-dimensional space. Here we trained deep reinforcement learning (RL) agents on the same evidence accumulation task and found that their neural activity can be described with a low-dimensional manifold spanned by task-relevant variables. These results provide further insight into similarities and differences between neural dynamics in mammals and deep RL agents. Furthermore, we showed that manifold learning can be used to characterize the representational space of the RL agents with the potential to improve the interpretability of decision-making in RL.}
}
@InProceedings{vastola2023,
title = {Optimal packing of attractor states in neural representations},
author = {Vastola, John},
pages = {425-442},
abstract = {Animals' internal states reflect variables like their position in space, orientation, decisions, and motor actions---but how should these internal states be arranged? Internal states which frequently transition between one another should be close enough that transitions can happen quickly, but not so close that neural noise significantly impacts the stability of those states, and how reliably they can be encoded and decoded. In this paper, we study the problem of striking a balance between these two concerns, which we call an `optimal packing' problem since it resembles mathematical problems like sphere packing. While this problem is generally extremely difficult, we show that symmetries in environmental transition statistics imply certain symmetries of the optimal neural representations, which allows us in some cases to exactly solve for the optimal state arrangement. We focus on two toy cases: uniform transition statistics, and cyclic transition statistics. Code is available at \url{https://github.com/john-vastola/optimal-packing-neurreps23}.}
}
@InProceedings{doenmez2023,
title = {Discovering latent causes and memory modification: A computational approach using symmetry and geometry},
author = {D\"onmez, Arif},
pages = {443-458},
abstract = {We learn from our experiences, even though they are never exactly the same. This implies that we need to assess their similarity to apply what we have learned from one experience to another. It is proposed that we “cluster” our experiences based on (hidden) <em>latent causes</em> that we infer. It is also suggested that surprises, which occur when our predictions are incorrect, help us categorize our experiences into distinct groups. In this paper, we develop a computational theory that emulates these processes based on two basic concepts from intuitive physics and Gestalt psychology using symmetry and geometry. We apply our approach to simple tasks that involve inductive reasoning. Remarkably, the output of our computational approach aligns closely with human responses.}
}