diff --git a/config.toml b/config.toml index eeb875b..eb2af0d 100644 --- a/config.toml +++ b/config.toml @@ -193,6 +193,12 @@ theme = 'mainroad' url = "/program/overview" parent = "program" weight = 10 + [[menu.main]] + identifier = "schedule" + name = "Schedule" + url = "/program/schedule" + parent = "program" + weight = 13 [[menu.main]] identifier = "accepted-papers" name = "Accepted Papers" diff --git a/content/program/schedule.md b/content/program/schedule.md new file mode 100644 index 0000000..ea82998 --- /dev/null +++ b/content/program/schedule.md @@ -0,0 +1,7 @@ +--- +title: "Schedule" +date: 2024-05-08T13:27:57+02:00 +draft: false +--- + +{{< detailed_schedule_json "/data/schedule/sessions.json" "/data/schedule/papers.json" >}} diff --git a/data/schedule/papers.json b/data/schedule/papers.json new file mode 100644 index 0000000..85b8151 --- /dev/null +++ b/data/schedule/papers.json @@ -0,0 +1,1303 @@ +{ + "Theory": { + "papers": [ + { + "number": 372, + "title": "Formal Representations of Classical Planning Domains", + "abstract": "Planning domains are an important notion, e.g. when it comes to restricting the input for generalized planning or learning approaches. However, domains as specified in PDDL cannot fully capture the intuitive understanding of a planning domain. We close this semantic gap and propose using PDDL axioms to characterize the (typically infinite) set of legal tasks of a domain. A minor extension makes it possible to express all properties that can be determined in polynomial time. We demonstrate the suitability of the approach on established domains from the International Planning Competition.", + "authors": "Claudia Grundke, Gabriele R\u00f6ger, Malte Helmert", + "url": "https://openreview.net/forum?id=c7WorvcDrF", + "pdf": "https://openreview.net//pdf/726ff5cd170eb1bbe0e8cb2aaff3e5a0fdf4bf4b.pdf", + "primary_keywords": [ + "Theory" + ], + "long": true, + "tldr": "We show that axioms are suitable for characterizing the legal tasks of a planning domain.", + "type": "long" + }, + { + "number": 255, + "title": "An Analysis of the Decidability and Complexity of Numeric Additive Planning", + "abstract": "In this paper, we first define numeric additive planning ($\\mathrm{NAP}$), a planning formulation equivalent to Hoffmann's Restricted Tasks over Integers. Then, we analyze the minimal number of action repetitions required for a solution, since planning turns out to be decidable as long as such numbers can be calculated for all actions. We differentiate between two kinds of repetitions and solve for one by integer linear programming and the other by search. Additionally, we characterize the differences between propositional planning and $\\mathrm{NAP}$ regarding these two kinds. To achieve this, we define so-called multi-valued partial order plans, a novel compact plan representation. Finally, we consider decidable fragments of $\\mathrm{NAP}$ and their complexity.", + "authors": "Hayyan Helal, Gerhard Lakemeyer", + "url": "https://openreview.net/forum?id=xGu1sMXiEO", + "pdf": "https://openreview.net//pdf/651232a69bcca6c10ccfe2eb7f61a90c5ed35bb5.pdf", + "primary_keywords": [ + "Theory" + ], + "long": true, + "tldr": "By analyzing the minimal required number of action repetition, decidable fragments of numeric planning can be defined.", + "type": "long" + }, + { + "number": 82, + "title": "Higher-Dimensional Potential Heuristics: Lower Bound Criterion and Connection to Correlation Complexity", + "abstract": "The correlation complexity is a measure of planning tasks indicating how hard they are. This measure is based on the question \"What dimension is needed to express a heuristic as a potential heuristic?\". In the introducing work, the authors provided sufficient criteria to detect a correlation complexity of at least 2 on a planning task. They also introduced an example of a planning task with correlation complexity 3. In this work, we introduce a criterion to detect a lower bound for the aforementioned question. This can also be used to detect an arbitrary correlation complexity and extend the mentioned example to show with the new criterion that planning tasks with arbitrary correlation complexity exist.", + "authors": "Simon Dold, Malte Helmert", + "url": "https://openreview.net/forum?id=6kRM5VKrpQ", + "pdf": "https://openreview.net//pdf/25159f2eb0455ba996441854aa7c8097656363a3.pdf", + "primary_keywords": [ + "Theory" + ], + "long": true, + "tldr": "We introduce a criterion to give a lower bound to the answer of \"What dimension is needed to express a heuristic as a potential heuristics?\".", + "type": "long" + }, + { + "number": 139, + "title": "Map Connectivity and Empirical Hardness of Grid-based Multi-Agent Pathfinding Problem", + "abstract": "We present an empirical study of the relationship between map connectivity and the empirical hardness of the multi-agent pathfinding~(MAPF) problem. By analyzing the second smallest eigenvalue~(commonly known as $\\lambda_2$) of the normalized Laplacian matrix of different maps, our initial study indicates that maps with smaller $\\lambda_2$ tend to create more challenging instances when agents are generated uniformly randomly. Additionally, we introduce a map generator based on Quality Diversity~(QD) that is capable of producing maps with specified $\\lambda_2$ ranges, offering a possible way for generating challenging MAPF instances. Despite the absence of a strict monotonic correlation with $\\lambda_2$ and the empirical hardness of MAPF, this study serves as a valuable initial investigation for gaining a deeper understanding of what makes a MAPF instance hard to solve.", + "authors": "Jingyao Ren, Eric Ewing, T. K. Satish Kumar, Sven Koenig, Nora Ayanian", + "url": "https://openreview.net/forum?id=Di1PO538HT", + "pdf": "https://openreview.net//pdf/c91944bca61664ace8e79c544ff861e30239badb.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": false, + "tldr": "An empirical study on the relationship between map connectivity and the empirical hardness of multi-agent pathfinding (MAPF) problem", + "type": "short" + }, + { + "number": 143, + "title": "On the Computational Complexity of Stackelberg Planning and Meta-Operator Verification", + "abstract": "Stackelberg planning is a two-player variant of classical planning,\nin which one player tries to ``sabotage'' the other player in achieving its\ngoal. This yields a bi-objective planning problem, which appears to be\ncomputationally more challenging than the single-player case. But is this\nactually true? All investigations so far focused on practical aspects, i.e.,\nalgorithms, and applications like cyber-security or very recently\nfor meta-operator verification in classical planning. \nWe close this gap by conducting the first theoretical complexity analysis\nof Stackelberg planning. We show that in general Stackelberg planning is no\nharder than classical planning. Under a polynomial plan-length restriction,\nhowever, Stackelberg planning is a level higher up in the polynomial complexity\nhierarchy, suggesting that compilations into classical planning come with an\nexponential plan-length increase. In attempts to identify tractable fragments\nexploitable, e.g., for Stackelberg planning heuristic design, we further study\nits complexity under various planning task restrictions, showing that\nStackelberg planning remains intractable where classical planning is not.\nWe finally inspect the complexity of the meta-operator verification, which in\nparticular gives rise to a new interpretation as the dual problem of Stackelberg\nplan existence.", + "authors": "Gregor Behnke, Marcel Steinmetz", + "url": "https://openreview.net/forum?id=29DeaP5Td5", + "pdf": "https://openreview.net//pdf/d075e151d355659564ec74bd56fb3fc534d93717.pdf", + "primary_keywords": [ + "Theory" + ], + "long": false, + "tldr": "", + "type": "short" + }, + { + "number": 98, + "title": "Unifying and Certifying Top-Quality Planning", + "abstract": "The growing utilization of planning tools in practical scenarios has sparked an interest in generating multiple high-quality plans. Consequently, a range of computational problems under the general umbrella of top-quality planning were introduced over a short time period, each with its own definition. \nIn this work, we show that the existing definitions can be unified into one, based on a dominance relation. The different computational problems, therefore, simply correspond to different dominance relations. \nGiven the unified definition, we can now certify the top-quality of the solutions, leveraging existing certification of unsolvability and optimality. We show that task transformations found in the existing literature can be employed for the efficient certification of various top-quality planning problems and propose a novel transformation to efficiently certify loopless top-quality planning.", + "authors": "Michael Katz, Junkyu Lee, Shirin Sohrabi", + "url": "https://openreview.net/forum?id=vYhmlz9owN", + "pdf": "https://openreview.net//pdf/21f25f67b16e21497b6a03fc1efbb343861cd6de.pdf", + "primary_keywords": [ + "Theory" + ], + "long": false, + "tldr": "We unify the existing definitions of various top-quality planning problems and certify their solutions.", + "type": "short" + }, + { + "number": 198, + "title": "Termination Properties of Transition Rules for Indirect Effects", + "abstract": "Indirect effects of agent's actions have traditionally been formalized as condition-effect rules that always fire whenever applicable, after each action taken by the agent.\nIn this work, we investigate a core problem of indirect effects, the possibility of arbitrarily or infinitely long sequences of rule firings. Specifically we investigate the termination of rule firings, as well as their confluence, that is, the uniqueness of the state that is ultimately reached. Both problems turn out to be PSPACE-complete. After this, we devise practically interesting syntactic and structural restrictions that guarantee polynomial-time termination and confluence tests.\nFinally, in the context of planning languages that support indirect effects, we propose new implementation technologies.", + "authors": "Mojtaba Elahi, Saurabh Fadnis, Jussi Rintanen", + "url": "https://openreview.net/forum?id=uEKuLkI5B0", + "pdf": "https://openreview.net//pdf/d24e9bcb5ba214294bb2addac72932441b981314.pdf", + "primary_keywords": "", + "long": true, + "tldr": "Theoretical analysis of indirect effects as used in Planning, KR, and elsewhere", + "type": "long" + } + ] + }, + "Scheduling": { + "papers": [ + { + "number": 193, + "title": "Incremental Ordering for Scheduling Problems", + "abstract": "Given an instance of a scheduling problem where we want to start executing jobs as soon as possible, it is advantageous if a scheduling algorithm emits the first parts of its solution early, in particular before the algorithm completes its work.\nTherefore, in this position paper, we analyze core scheduling problems in regards to their enumeration complexity, i.e. the computation time to the first emitted schedule entry (preprocessing time) and the worst case time between two consecutive parts of the solution (delay).\n\nSpecifically, we look at scheduling instances that reduce to ordering problems.\nWe apply a known incremental sorting algorithm for scheduling strategies that are at their core comparison-based sorting algorithms and translate corresponding upper and lower complexity bounds to the scheduling setting.\nFor instances with $n$ jobs and a precedence DAG with maximum degree $\\Delta$, we incrementally build a topological ordering with $O(n)$ preprocessing and $O(\\Delta)$ delay.\nWe prove a matching lower bound and show with an adversary argument that the delay lower bound holds even in case the DAG has constant average degree and the ordering is emitted out-of-order in the form of insert operations.\n\nWe complement our theoretical results with experiments that highlight the improved time-to-first-output and discuss research opportunities for similar incremental approaches for other scheduling problems.", + "authors": "Stefan Neubert, Katrin Casel", + "url": "https://openreview.net/forum?id=yXzvVxkYqn", + "pdf": "https://openreview.net//pdf/a2fd41bf4678e712711449becb58d5e566d0c1b0.pdf", + "primary_keywords": [ + "Theory" + ], + "long": true, + "tldr": "We study the concept of computing first solution parts to core scheduling problems.", + "type": "long" + }, + { + "number": 131, + "title": "Investigating Large Neighbourhood Search for Bus Driver Scheduling", + "abstract": "The Bus Driver Scheduling Problem (BDSP) is a combinatorial optimisation problem with high practical relevance. The aim is to assign bus drivers to predetermined routes while minimising a specified objective function that considers operating costs as well as employee satisfaction. Since we must satisfy several rules from a collective agreement and European regulations, the BDSP is highly constrained. Hence, using exact methods to solve large real-life-based instances is computationally too expensive, while heuristic methods still have a considerable gap to the optimum.\nOur paper presents a Large Neighbourhood Search (LNS) approach to solve the BDSP. We propose several novel destroy operators and an approach using column generation to repair the sub-problem.\nWe analyse the impact of the destroy and repair operators and investigate various possibilities to select them, including adaptivity.\nThe proposed approach improves all the upper bounds for larger instances that exact methods cannot solve, as well as for some mid-sized instances, and outperforms existing heuristic approaches for this problem on all benchmark instances.", + "authors": "Tommaso Mannelli Mazzoli, Lucas Kletzander, Pascal Van Hentenryck, Nysret Musliu", + "url": "https://openreview.net/forum?id=d4TzG4ivNu", + "pdf": "https://openreview.net//pdf/1f5f597f05160b4ad85381339e97d0b3aac8d3c1.pdf", + "primary_keywords": [ + "Applications" + ], + "long": true, + "tldr": "We propose a new Large Neighbourhood Search for Bus Driver Scheduling based on novel destroy operators and Column Generation, which outperforms all previous methods on larger instances", + "type": "long" + }, + { + "number": 249, + "title": "Preference Explanation and Decision Support for Multi-Objective Real-World Test Laboratory Scheduling", + "abstract": "Complex real-world scheduling problems often include multiple conflicting objectives.\nDecision makers (DMs) can express their preferences over those objectives in different ways, including as sets of weights which are used in a linear combination of objective values.\nHowever, finding good sets of weights that result in solutions with desirable qualities is challenging and currently involves a lot of trial and error.\nWe propose a general method to explain objectives' values under a given set of weights using Shapley regression values.\nWe demonstrate this approach on the Test Laboratory Scheduling Problem (TLSP), for which we propose a multi-objective solution algorithm and show that suggestions for weight adjustments based on the introduced explanations are successful in guiding decision makers towards solutions that match their expectations.\nThis method is included in the TLSP MO-Explorer, a new decision support system that enables the exploration and analysis of high-dimensional Pareto fronts.", + "authors": "Florian Mischek, Nysret Musliu", + "url": "https://openreview.net/forum?id=1u95DvUJjE", + "pdf": "https://openreview.net//pdf/1d036edfd204a2bed8d9faff56203ca6ae935649.pdf", + "primary_keywords": [ + "Applications", + "Human-aware Planning and Scheduling" + ], + "long": true, + "tldr": "We propose a method to explain the output of multi-objective optimization algorithms based on Shapley regression values and apply it in practice to the Test Laboratory Scheduling Problem", + "type": "long" + }, + { + "number": 328, + "title": "A Real-Time Rescheduling Algorithm for Multi-robot Plan Execution", + "abstract": "One area of research in Multi-Agent Path Finding (MAPF) is to determine how re-planning can be efficiently achieved in the case of agents being delayed during execution. One option is to determine a new wait order, i.e., an ordering for multiple agents that are planned to visit the same location, to find the most optimal new solution that can be produced by re-ordering the wait order. We propose to use a Switchable Temporal Plan Graph and a heuristic search algorithm to approach finding a new optimal wait order. We prove the admissibility of our algorithm and experiment with its efficiency in a variety of conditions by measuring re-planning speed in different maps, with varying numbers of agents and randomized scenarios for agents' start and goal locations. Our algorithm shows a fast runtime in all experimental setups.", + "authors": "Ying Feng, Adittyo Paul, Zhe Chen, Jiaoyang Li", + "url": "https://openreview.net/forum?id=qgtJSfNtWJ", + "pdf": "https://openreview.net//pdf/72b6bd00b2d8a284c491f405c9e515782421595d.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": true, + "tldr": "We propse a new replanning algorithm that finds the optimal wait ordering of agents and shows fast runtime.", + "type": "long" + } + ] + }, + "MAPF": { + "papers": [ + { + "number": 185, + "title": "Efficient Approximate Search for Multi-Objective Multi-Agent Path Finding", + "abstract": "The Multi-Objective Multi-Agent Path Finding (MO-MAPF) problem is the problem of computing collision-free paths for a team of agents while considering multiple cost metrics. Most existing MO-MAPF algorithms aim to compute the Pareto frontier of the solutions. However, a Pareto frontier can be time-consuming to compute and contain solutions with similar costs. Our first main contribution is BB-MO-CBS-pex, an approximate MO-MAPF algorithm that computes an approximate frontier for the user-specific approximation factor. BB-MO-CBS-pex builds upon BB-MO-CBS, a state-of-the-art MO-MAPF algorithm, and leverages A*pex, a state-of-the-art single-agent multi-objective search algorithm, to speed up different parts of BB-MO-CBS. We also provide two speed-up techniques for BB-MO-CBS-pex. Our second main contribution is BB-MO-CBS-k, which builds upon BB-MO-CBS-pex and computes up to k solutions for a user-provided k-value. BB-MO-CBS-k is useful when it is unclear how to determine an appropriate approximation factor. Our experimental results show that both BB-MO-CBS-pex and BB-MO-CBS-k solved significantly more instances than BB-MO-CBS for different approximation factors and k-values, respectively. Additionally, we compare BB-MO-CBS-pex with an approximate baseline algorithm derived from BB-MO-CBS and show that BB-MO-CBS-pex achieved speed-ups up to two orders of magnitude.", + "authors": "Fangji Wang, Han Zhang, Sven Koenig, Jiaoyang Li", + "url": "https://openreview.net/forum?id=FdzJ5TnVd7", + "pdf": "https://openreview.net//pdf/49c40bc4a4bde512afd47de779c13c08d7158793.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 37, + "title": "Improving Learnt Local MAPF Policies with Heuristic Search", + "abstract": "Multi-agent path finding (MAPF) is the problem of finding collision-free paths for a team of agents to reach their goal locations. \nState-of-the-art classical MAPF solvers typically employ heuristic search to find solutions for hundreds of agents but are typically centralized and can struggle to scale to larger numbers of agents.\nMachine learning (ML) approaches that learn policies for each agent are appealing as these could be decentralized systems and scale well while maintaining good solution quality. Current ML approaches to MAPF have proposed methods that have started to scratch the surface of this potential. However, state-of-the-art ML approaches produce \"local\" policies that only plan for a single timestep and have poor success rates and scalability. Our main idea is that we can improve a ML local policy by using heuristic search methods on the output probability distribution to resolve deadlocks and enable full horizon planning. We show several model-agnostic ways to use heuristic search with ML that significantly improves the local ML policy's success rate and scalability. \nTo our best knowledge, we demonstrate the first time ML-based MAPF approaches have scaled to similar high congestion (e.g. 40% agent density) as state-of-the-art heuristic search methods.", + "authors": "Rishi Veerapaneni, Qian Wang, Kevin Ren, Arthur Jakobsson, Jiaoyang Li, Maxim Likhachev", + "url": "https://openreview.net/forum?id=6JEBeiztNT", + "pdf": "https://openreview.net//pdf/c103584493e70e1caa982bd69f58b19935024d74.pdf", + "primary_keywords": [ + "Learning", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 36, + "title": "MAPF in 3D Warehouses: Dataset and Analysis", + "abstract": "Recent works have made significant progress in multi-agent path finding (MAPF), with modern methods being able to scale to hundreds of agents, handle unexpected delays, work in groups, etc. The vast majority of these methods have focused on 2D \"grid world\" domains. However, modern warehouses often utilize multi-agent robotic systems that can move in 3D, enabling dense storage but resulting in a more complex multi-agent planning problem. Motivated by this, we introduce and experimentally analyze the application of MAPF to 3D warehouse management, and release the first open-source 3D MAPF dataset. We benchmark two state-of-the-art MAPF methods, EECBS and MAPF-LNS2, and show how different hyper-parameters affect these methods across various 3D MAPF problems. We also investigate how the warehouse structure itself affects MAPF performance. Based on our experimental analysis, we find that a fast low-level search is critical for 3D MAPF, EECBS's suboptimality significantly changes the effect of certain CBS techniques, and certain warehouse designs can noticeably influence MAPF scalability and speed.", + "authors": "Qian Wang, Rishi Veerapaneni, Yu Wu, Jiaoyang Li, Maxim Likhachev", + "url": "https://openreview.net/forum?id=j0urLcZsGX", + "pdf": "https://openreview.net//pdf/92e6fbf53e51797385720db949b976dae021d0dd.pdf", + "primary_keywords": [ + "Applications", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 168, + "title": "Planning and Execution in Multi-Agent Path Finding: Models and Algorithms", + "abstract": "In applications of Multi-Agent Path Finding (MAPF), it is often the sum of planning and execution times \nthat needs to be minimised (i.e., the em Goal Achievement Time). Yet current methods seldom optimise for this objective. \nOptimal algorithms reduce execution time, but may require exponential planning time. Non-optimal algorithms reduce planning time, but at the expense of increased path length. To address these limitations we introduce PIE (Planning and Improving while Executing), a new framework for concurrent planning and execution in MAPF. We show how different instantiations of PIE affect practical performance, including initial planning time, action commitment time and concurrent vs. sequential planning and execution. We then adapt PIE to Lifelong MAPF, a popular application setting where agents are continuously assigned new goals and where additional decisions are required to ensure feasibility. We examine a variety of different approaches to overcome these challenges and we conduct comparative experiments vs. recently proposed alternatives. Results show that PIE substantially outperforms existing methods for One-shot and Lifelong MAPF.", + "authors": "Yue Zhang, Zhe Chen, Daniel Harabor, Pierre Le Bodic, Peter J. Stuckey", + "url": "https://openreview.net/forum?id=uzLS2zz3er", + "pdf": "https://openreview.net//pdf/650873d991217da628fc3a9bdbfee5bbb98c9b1e.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 64, + "title": "Robust Multi-Agent Pathfinding with Continuous Time", + "abstract": "Multi-Agent Pathfinding (MAPF) is the problem of finding plans for multiple agents such that every agent moves from its start location to its goal location without collisions.\nIf unexpected events delay some agents during plan execution, it may not be possible for the agents to continue following their plans without causing any collision.\nWe define and solve a $T$-robust MAPF problem that seeks plans that can be followed even if some delays occur, under the generalized MAPF$_R$ setting with continuous time notions. \nThe proposed approach is complete and provides provably optimal solutions.\nWe also develop an exact method for collision detection among agents that can be delayed.\nWe experimentally evaluate our proposed approach in terms of efficiency and plan cost.", + "authors": "Wen Jun Tan, Xueyan Tang, Wentong Cai", + "url": "https://openreview.net/forum?id=AIwAtZzM3v", + "pdf": "https://openreview.net//pdf/a40996ed4b82f5176981d79f54a85dcbc4cc238e.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": true, + "tldr": "Robust Multi-Agent Pathfinding with Continuous Time using Exact Collision Detection.", + "type": "long" + }, + { + "number": 111, + "title": "SKATE : Successive Rank-based Task Assignment for Proactive Online Planning", + "abstract": "The development of online applications for services such as package delivery, crowdsourcing, or taxi dispatching has caught the attention of the research community to the domain of online multi-agent multi-task allocation. In online service applications, tasks (or requests) to be performed arrive over time and need to be dynamically assigned to agents. Such planning problems are challenging because: (i) few or almost no information about future tasks is available for long-term reasoning; (ii) agent number, as well as, task number can be impressively high; and (iii) an efficient solution has to be reached in a limited amount of time. In this paper, we propose SKATE, a successive rank-based task assignment algorithm for online multi-agent planning. SKATE can be seen as a meta-heuristic approach which successively assigns a task to the best-ranked agent until all tasks have been assigned. We assessed the complexity of SKATE and showed it is cubic in the number of agents and tasks. To investigate how multi-agent multi-task assignment algorithms perform under a high number of agents and tasks, we compare three multi-task assignment methods in synthetic and real data benchmark environments: Integer Linear Programming (ILP), Genetic Algorithm (GA), and SKATE. In addition, a proactive approach is nested to all methods to determine near-future available agents (resources) using a receding-horizon. Based on the results obtained, we can argue that the classical ILP offers the better quality solutions when treating a low number of agents and tasks, i.e. low load despite the receding-horizon size, while it struggles to respect the time constraint for high load. SKATE performs better than the other methods in high load conditions, and even better when a variable receding-horizon is used.", + "authors": "Deborah Conforto Nedelmann, J\u00e9r\u00f4me Lacan, Caroline Ponzoni Carvalho Chanel", + "url": "https://openreview.net/forum?id=TuSHy3fDOZ", + "pdf": "https://openreview.net//pdf/14916b4b5262a0136ae84aec393b0231f8542eee.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": true, + "tldr": "We propose SKATE, a successive rank-based task assignment algorithm for online multi-agent planning inspired by an algorithm proposed in the filed of multi-robot exploration.", + "type": "long" + } + ] + }, + "RL": { + "papers": [ + { + "number": 175, + "title": "Control in Stochastic Environment with Delays: A Model-based Reinforcement Learning Approach", + "abstract": "In this paper we are introducing a new reinforcement learning method for control problems in environments with delayed feedback. Specifically, our method employs stochastic planning, versus previous methods that used deterministic planning. This allows us to embed risk preference in the policy optimization problem. We show that this formulation can recover the optimal policy for problems with deterministic transitions. We contrast our policy with two prior methods from literature. We apply the methodology to simple tasks to understand its features. Then, we compare the performance of the methods in controlling multiple Atari games.", + "authors": "Zhiyuan Yao, Ionut Florescu, Chihoon Lee", + "url": "https://openreview.net/forum?id=9D1sAtvuLi", + "pdf": "https://openreview.net//pdf/be0a27cc834f8bcb697c9ebd2d47dc07771cf891.pdf", + "primary_keywords": [ + "Applications", + "Learning" + ], + "long": true, + "tldr": "A novel method to mitigate performance degradation issue in environments with delayed feedback.", + "type": "long" + }, + { + "number": 191, + "title": "Imitating Cost Constrained Behaviors in Reinforcement Learning", + "abstract": "Complex planning and scheduling problems have long been solved using various optimization or heuristic approaches. In recent year, imitation learning that aims to learn from expert demonstrations has been proposed as a viable alternative in solving these problems. Generally speaking, imitation learning is designed to learn either the reward (or preference) model or directly the behavioral policy by observing the behavior of an expert. Existing work in imitation learning and inverse reinforcement learning has focused on imitation primarily in unconstrained settings (e.g., no limit on fuel consumed by the vehicle). However, in many real-world domains, the behavior of an expert is governed not only by reward (or preference) but also by constraints. For instance, decisions on self-driving delivery vehicles are dependent not only on the route preferences/rewards (depending on past demand data) but also on the fuel in the vehicle and the time available. In such problems, imitation learning is challenging as decisions are not only dictated by the reward model but are also dependent on a cost constraint model. In this paper, we provide multiple methods that match expert distributions in the presence of trajectory cost constraints through: (a) Lagrangian-based method; (b) Meta-gradients to find a good trade-off between expected return and minimizing constraint violation; and (c) Cost-violation-based alternating gradient. We empirically show that leading imitation learning approaches imitate cost-constrained behaviors poorly and show that our meta-gradient-based approach achieves the best performance.", + "authors": "Qian Shao, Pradeep Varakantham, Shih-Fen Cheng", + "url": "https://openreview.net/forum?id=YUs5Rlyw8l", + "pdf": "https://openreview.net//pdf/8cc8449754eb2734f2faf57191601f35d896b226.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 252, + "title": "LCPPO: An Efficient Multi-agent Reinforcement Learning Algorithm on Complex Railway Network", + "abstract": "The complex railway network is a challenging real-world multi-agent system usually involving thousands of agents. Current planning methods heavily depend on expert knowledge to formulate solutions for specific cases and are therefore hardly generalized to new scenarios, on which Multi-agent Reinforcement Learning (MARL) draws significant attention. Despite some successful applications in multi-agent decision-making tasks, MARL is hard to be scaled to a large number of agents. This paper rethinks the curse of agents in the centralized-training-decentralized-execution paradigm and proposes a local-critic approach to address the issue. By combining the local critic with the PPO algorithm, we design a deep MARL algorithm denoted as Local Critic PPO (LCPPO). In experiments, we evaluate the effectiveness of LCPPO on a complex railway network benchmark, Flatland, with various numbers of agents. Noticeably, LCPPO shows prominent generalizability and robustness under the changes of environments.", + "authors": "Yuan Zhang, Umashankar Deekshith, Jianhong Wang, Joschka Boedecker", + "url": "https://openreview.net/forum?id=gylH3hNASm", + "pdf": "https://openreview.net//pdf/454925c51e7f7a931c7b7b91091e809c2c6400b0.pdf", + "primary_keywords": [ + "Applications", + "Learning", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 292, + "title": "Logical Specifications-guided Dynamic Task Sampling for Reinforcement Learning Agents", + "abstract": "Reinforcement Learning (RL) has made significant strides in enabling artificial agents to learn diverse behaviors. However, learning an effective policy often requires a large number of environment interactions. To mitigate sample complexity issues, recent approaches have used high-level task specifications, such as Linear Temporal Logic (LTLf ) formulas or Reward Machines (RM), to guide the learning progress of the agent. In this work, we propose a novel approach, called Logical Specifications-guided Dynamic Task Sampling (LSTS), that learns a set of RL policies to guide an agent from an initial state to a goal state based on a high-level task specification, while minimizing the number of environmental interactions. Unlike previous work, LSTS does not assume information about the environment dynamics or the Reward Machine, and dynamically samples promising tasks that lead to successful goal policies. We evaluate LSTS on a gridworld and show that it achieves improved time-to-threshold performance on complex sequential decision-making problems compared to state-of-the-art RM and Automaton-guided RL baselines, such as Q-Learning for Reward Machines and Compositional RL from logical Specifications (DIRL). Moreover, we demonstrate that our method outperforms RM and Automaton-guided RL baselines in terms of sample-efficiency, both in a partially observable robotic task and in a continuous control robotic manipulation task.", + "authors": "Yash Shukla, Tanushree Burman, Abhishek Kulkarni, Robert Wright, Alvaro Velasquez, Jivko Sinapov", + "url": "https://openreview.net/forum?id=okLobjqfjx", + "pdf": "https://openreview.net//pdf/56f8c2764c2d5dfceecea2041cbc897153143f03.pdf", + "primary_keywords": [ + "Learning", + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "Representing the task objective using logical speicifications and then employing Teacher-Student strategies and logical specifications to learn sample-efficient goal-directed RL policies when symbolic operators are absent.", + "type": "long" + }, + { + "number": 230, + "title": "Online Control of Adaptive Large Neighborhood Search Using Deep Reinforcement Learning", + "abstract": "The Adaptive Large Neighborhood Search (ALNS) algorithm has shown considerable success in solving combinatorial optimization problems (COPs). Nonetheless, the performance of ALNS relies on the proper configuration of its selection and acceptance parameters, which is known to be a complex and resource-intensive task. To address this, we introduce a Deep Reinforcement Learning (DRL) based approach called DR-ALNS that selects operators, adjusts parameters, and controls the acceptance criterion throughout the search. The proposed method aims to learn, based on the state of the search, to configure ALNS for the next iteration to yield more effective solutions for the given optimization problem. We evaluate the proposed method on an orienteering problem with stochastic weights and time windows, as presented in an IJCAI competition. The results show that our approach outperforms vanilla ALNS, ALNS tuned with Bayesian optimization, and two state-of-the-art DRL approaches that were the winning methods of the competition, achieving this with significantly fewer training observations. Furthermore, we demonstrate several good properties of the proposed DR-ALNS method: it is easily adapted to solve different routing problems, its learned policies perform consistently well across various instance sizes, and these policies can be directly applied to different problem variants. We will make our implementation code publicly available.", + "authors": "Robbert Reijnen, Yingqian Zhang, Hoong Chuin Lau, Zaharah Bukhsh", + "url": "https://openreview.net/forum?id=qMo2FWCznH", + "pdf": "https://openreview.net//pdf/6ba2ec421ed18ccccb2c4310747f4a56e723f3d4.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "The study introduces a Deep Reinforcement Learning approach to enhance the Adaptive Large Neighborhood Search algorithm by optimizing operator selection and online parameter configuration", + "type": "long" + }, + { + "number": 200, + "title": "Planning with a Learned Policy Basis to Optimally Solve Complex Tasks", + "abstract": "Conventional reinforcement learning (RL) methods can successfully solve a wide range of sequential decision problems. However, learning policies that can generalize predictably across multiple tasks in a setting with non-Markovian reward specifications is a challenging problem. We propose to use successor features to learn a set of local policies that each solves a well-defined subproblem. In a task described by a finite state automaton (FSA) that involves the same set of subproblems, the combination of these local policies can then be used to generate an optimal solution without additional learning. In contrast to other methods that combine local policies via planning, our method asymptotically attains global optimality, even in stochastic environments.", + "authors": "David Kuric, Guillermo Infante, Vicen\u00e7 G\u00f3mez, Anders Jonsson, Herke van Hoof", + "url": "https://openreview.net/forum?id=6N1uCtBhcL", + "pdf": "https://openreview.net//pdf/ccf3383273298718f4781f8b5088769d705c4c1b.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "", + "type": "long" + } + ] + }, + "Planning and Learning": { + "papers": [ + { + "number": 228, + "title": "Action Model Learning from Noisy Traces: a Probabilistic Approach", + "abstract": "We address the problem of learning planning domains from plan traces that are obtained by observing the environment states through noisy sensors. In such situations, approaches that assume correct traces are not applicable. We tackle the problem by designing a probabilistic graphical model where preconditions and effects of every planning domain operators, and traces\u2019 observations are modeled by random variables. Probabilistic inference conditioned by the observed traces allows our approach to derive a posterior probability of an atom being a precondition and/or an effect of an operator. Planning domains are obtained either by sampling or by applying the maximum a posteriori criterion. We compare our approach with a frequentist baseline and the currently available state-of-the-art approaches. We measure the performance of each method according to two criteria: reconstruction of the original planning domain and effectiveness in solving new planning problems of the same domain. Our experimental analysis shows that our approach learns action models that are more accurate w.r.t. state-of-the-art approaches, and strongly outperforms other approaches in generating models that are effective for solving new problems.", + "authors": "Leonardo Lamanna, Luciano Serafini", + "url": "https://openreview.net/forum?id=nSGl4pICyD", + "pdf": "https://openreview.net//pdf/8c4c7e09fdf9e2fb9485bf8c82d4ab775b8b71e0.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 41, + "title": "Neuro-symbolic Learning of Lifted Action Models from Visual Traces", + "abstract": "Model-based planners rely on action models to describe available actions in terms of their preconditions and effects. Nonetheless, manually encoding such models is challenging, especially in complex domains. Numerous methods have been proposed to learn action models from examples of plan execution traces. However, high-level information, such as state labels within traces, is often unavailable and needs to be inferred indirectly from raw observations. In this paper, we aim to learn lifted action models from visual traces --- sequences of image-action pairs depicting discrete successive trace steps. We present ROSAME, a differentiable neu$\\textbf{RO}$-$\\textbf{S}$ymbolic $\\textbf{A}$ction $\\textbf{M}$odel l$\\textbf{E}$arner that infers action models from traces consisting of probabilistic state predictions and actions. By combining ROSAME with a deep learning computer vision model, we create an end-to-end framework that jointly learns state predictions from images and infers symbolic action models. Experimental results demonstrate that our method succeeds in both tasks, using different visual state representations, with the learned action models often matching or even surpassing those created by humans.", + "authors": "Kai Xi, Stephen Gould, Sylvie Thiebaux", + "url": "https://openreview.net/forum?id=Kj86KzR4Xr", + "pdf": "https://openreview.net//pdf/3413a773a24ac5a73b874c296d5de66c78d0029d.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "We extract action models for planning domains from visual traces via probabilistic neuro-symbolic learning.", + "type": "long" + }, + { + "number": 239, + "title": "Safe Learning of PDDL Domains with Conditional Effects", + "abstract": "Powerful domain-independent planners have been developed to solve various types of planning problems. \nThese planners often require a model of the acting agent's actions, given in some planning domain description language. \nManually designing such an action model is a notoriously challenging task. \nAn alternative is to automatically learn action models from observation. \nSuch an action model is called safe if plans consistent with it are also consistent with the real, unknown action model. \nAlgorithms for learning such safe action models exist, yet they cannot handle domains with conditional or universal effects, which are common constructs in many planning problems. \nWe prove that learning non-trivial safe action models with conditional effects may require an exponential number of samples.\nThen, we identify reasonable assumptions under which such learning is tractable and propose Conditional-SAM, the first algorithm capable of doing so. \nWe analyze this Conditional-SAM theoretically and evaluate it experimentally. \nOur results show that the action models learned by Conditional-SAM can be used to solve perfectly most of the test set problems in most of the experimented domains.", + "authors": "Argaman Mordoch, Enrico Scala, Roni Stern, Brendan Juba", + "url": "https://openreview.net/forum?id=uNkn9kDBmy", + "pdf": "https://openreview.net//pdf/8f480d4c66c45caac6a0165a8565b82e3720fc16.pdf", + "primary_keywords": [ + "Learning", + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "A novel approach to learning action models with conditional and universal effects while maintaining safety.", + "type": "long" + }, + { + "number": 110, + "title": "Expressiveness of Graph Neural Networks in Planning Domains", + "abstract": "Graph Neural Networks (GNNs) have recently become the standard method of choice for learning with structured data, demonstrating particular promise in classical planning. Their inherent invariance under symmetries of the input graphs endows them with superior generalization capabilities compared to their symmetry-oblivious counterparts. However, this comes at the cost of limited expressive power. Notably, it is known that GNNs cannot distinguish between graphs that satisfy identical sentences of C$_2$ logic.\n \nTo leverage GNNs for learning policies in PDDL domains, one needs to encode the contextual representation of the planning states as graphs. The effectiveness of this encoding, coupled with a specific GNN architecture, hinges on the absence of indistinguishable states necessitating distinct actions. This paper provides a comprehensive theoretical and statistical exploration of such situations in PDDL domains across diverse natural encoding schemes and GNN models.", + "authors": "Rostislav Horcik, Gustav \u0160\u00edr", + "url": "https://openreview.net/forum?id=pKEkSAPSGJ", + "pdf": "https://openreview.net//pdf/e9904922aa775dbe34f630e7be4c46276d1fafba.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "The paper investigates the limits of expressive power of graph neural networks in policy learning for PDDL domains.", + "type": "long" + }, + { + "number": 54, + "title": "Return to Tradition: Learning Reliable Heuristics with Classical Machine Learning", + "abstract": "There has been a renewed interest in applying machine learning to planning due to recent developments in deep neural networks, with a lot of focus being placed on learning domain-dependent heuristics. However, current approaches for learning heuristics have yet to achieve competitive performance against domain-independent heuristics in several domains, and have poor overall performance. In this work, we construct novel graph representations of lifted planning tasks and use the WL algorithm to generate features from them. These features are used with classical machine learning methods such as Support Vector Machines and Gaussian Processes, which are both fast to train and evaluate. Our novel approach, WL-GOOSE, reliably learns heuristics from scratch and outperforms the $h^{\\text{FF}}$ heuristic. It also outperforms or ties with LAMA on 4 out of 10 domains. To our knowledge, the WL-GOOSE learned heuristics are the first to achieve these feats. Furthermore, we study the connections between our novel feature generation methods, previous theoretically flavoured learning architectures, and Description Logic features.", + "authors": "Dillon Ze Chen, Felipe Trevizan, Sylvie Thiebaux", + "url": "https://openreview.net/forum?id=zVO8ZRIg7Q", + "pdf": "https://openreview.net//pdf/18b5e97184829e76c3334bf643382ed4700b9d59.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "learned heuristics with classical machine learning that outperform $h^{\\text{FF}}$ on the learning track of the 2023 IPC, and theoretical connections to GNN and Description Logic features", + "type": "long" + }, + { + "number": 100, + "title": "Specifying Goals to Deep Neural Networks with Answer Set Programming", + "abstract": "Recently, methods such as DeepCubeA have used deep reinforcement learning to learn domain-specific heuristic functions in a largely domain-independent fashion. However, such methods either assume a predetermined goal or assume that goals will be given as fully-specified states. Therefore, specifying a set of goal states is not possible for learned heuristic functions while, on the other hand, the Planning Domain Definition Language (PDDL) allows for the specification of goal states using ground atoms in first-order logic. To address this issue, we introduce a method of training a heuristic function that estimates the distance between a given state and a set of goal states represented as a set of ground atoms in first-order logic. Furthermore, to allow for more expressive goal specification, we introduce techniques for specifying goals as answer set programs and using answer set solvers to discover sets of ground atoms that meet the specified goals. In our experiments with the Rubik's cube, sliding tile puzzles, and Sokoban, we show that we can specify and reach different goals without any need to re-train the heuristic function.", + "authors": "Forest Agostinelli, Rojina Panta, Vedant Khandelwal", + "url": "https://openreview.net/forum?id=x88vQjoZhK", + "pdf": "https://openreview.net//pdf/d207f49f42b69f2d01e3e7509ff4111d9ed479b5.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "Train DNN heuristic to estimate distance between state and set of goal states, use ASP to specify goals", + "type": "long" + } + ] + }, + "Planning Under Uncertainty": { + "papers": [ + { + "number": 173, + "title": "Addressing Myopic Constrained POMDP Planning with Recursive Dual Ascent", + "abstract": "Lagrangian-guided Monte Carlo tree search with global dual ascent has been applied to solve large constrained partially observable Markov decision processes (CPOMDPs) online. In this work, we demonstrate that these global dual parameters can lead to myopic action selection during exploration, ultimately leading to suboptimal decision making. To address this, we introduce history-dependent dual variables that guide local action selection and are optimized with recursive dual ascent. We empirically compare the performance of our approach on a motivating toy example and two large CPOMDPs, demonstrating improved exploration, and ultimately, safer outcomes.", + "authors": "Paula Stocco, Suhas Chundi, Arec Jamgochian, Mykel Kochenderfer", + "url": "https://openreview.net/forum?id=8jLtiqKe8j", + "pdf": "https://openreview.net//pdf/129e93912878e1cb7fda8a6b261155cd98b16fcc.pdf", + "primary_keywords": "", + "long": false, + "tldr": "", + "type": "short" + }, + { + "number": 362, + "title": "Non-Deterministic Planning for Hyperproperty Verification", + "abstract": "Non-deterministic planning aims to find a policy that achieves a given objective in an environment where actions have uncertain effects, and the agent - potentially - only observes parts of the current state.\nHyperproperties are properties that relate multiple paths of a system and can, e.g., capture security and information-flow policies.\nPopular logics for expressing hyperproperties - such as HyperLTL - extend LTL by offering selective quantification over executions of a system.\nIn this paper, we show that planning offers a powerful intermediate language for the automated verification of hyperproperties. \nConcretely, we present an algorithm that, given a HyperLTL verification problem, constructs a non-deterministic multi-agent planning instance (in the form of a QDec-POMDP) that, when admitting a plan, implies the satisfaction of the verification problem.\nWe show that for large fragments of HyperLTL, the resulting planning instance corresponds to a classical, FOND, or POND planning problem.\nWe implement our encoding in a prototype verification tool and report on encouraging experimental results using off-the-shelf FOND planners.", + "authors": "Raven Beutner, Bernd Finkbeiner", + "url": "https://openreview.net/forum?id=P4O7iEVUcm", + "pdf": "https://openreview.net//pdf/2127b2075de9f39149ea8f5180c3169620339f5e.pdf", + "primary_keywords": "", + "long": false, + "tldr": "We show that HyperLTL verification can be encoded into non-deterministic planning, yielding an effective verification algorithm that outperforms the SOTA.", + "type": "short" + }, + { + "number": 1005, + "title": "Plug'n Play Task-Level Autonomy for Robotics Using POMDPs and Probabilistic Programs", + "abstract": "We describe AOS, the first general-purpose system for model-based control of autonomous robots using AI planning that fully supports partial observability and noisy sensing. The AOS provides a code-based language for specifying a generative model of the system, making", + "authors": "Or Wertheim, Dan R. Suissa, Ronen I. Brafman", + "venue": "IEEE Robotics and Automation", + "url": "https://arxiv.org/abs/2207.09713", + "pdf": "https://arxiv.org/abs/2207.09713.pdf", + "primary_keywords": [], + "long": false, + "tldr": "", + "type": "prev" + }, + { + "number": 153, + "title": "Epistemic Exploration for Generalizable Planning and Learning in Non-Stationary Settings", + "abstract": "This paper introduces a new approach for continual planning and model learning in non-stationary stochastic environments expressed using relational representations. Such capabilities are essential for the deployment of sequential decision-making systems in the uncertain, constantly evolving real world. Working in such practical settings with unknown (and non-stationary) transition systems and changing tasks, the proposed framework models gaps in the agent's current state of knowledge and uses them to conduct focused, investigative explorations. Data collected using these explorations is used for learning generalizable probabilistic models for solving the current task despite continual changes in the environment dynamics. Empirical evaluations on several benchmark domains show that this approach significantly outperforms planning and RL baselines in terms of sample complexity in non-stationary settings. Theoretical results show that the system reverts to exhibit desirable convergence properties when stationarity holds.", + "authors": "Rushang Karia, Pulkit Verma, Alberto Speranzon, Siddharth Srivastava", + "url": "https://openreview.net/forum?id=NZaAq9YpId", + "pdf": "https://openreview.net//pdf/e0661e21dfc9553f43a4cc16897628bbc76c1f62.pdf", + "primary_keywords": [ + "Learning", + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "We propose an approach that integrates intelligent data gathering, planning and learning for efficient symbolic RL", + "type": "long" + }, + { + "number": 240, + "title": "Weak and Strong Reversibility of Non-Deterministic Actions: Universality and Uniformity", + "abstract": "Classical planning looks for a sequence of actions that transform the initial state of the environment into a goal state. Studying whether the effects of an action can be undone by a sequence of other actions, that is, action reversibility, is beneficial, for example, in determining whether an action is safe to apply. This paper deals with action reversibility of non-deterministic actions, i.e. actions whose application might result in different outcomes. Inspired by the established notions of weak and strong plans in non-deterministic (or FOND) planning, we define the notions of weak and strong reversibility for non-deterministic actions. We then focus on the universality and uniformity of action reversibility, that is, whether we can always undo all possible effects of the action by the same means (i.e. policy), or whether some of the effects can never be undone. We show how these classes of problems can be solved via classical or FOND planning and evaluate our approaches on FOND benchmark domains.", + "authors": "Jakub Med, Lukas Chrpa, Michael Morak, Wolfgang Faber", + "url": "https://openreview.net/forum?id=ABcIhcbIqH", + "pdf": "https://openreview.net//pdf/c04eb73259efd9aacbecdb09d8c2bd667d9e641c.pdf", + "primary_keywords": "", + "long": true, + "tldr": "It defines and investigates a (novel) notion of action reversibility in the area of non-deterministic planning. It provides several theoretical results for both general and specific action reversibility and proposes several methods for its detection.", + "type": "long" + }, + { + "number": 164, + "title": "A Counter-Example Based Approach to Probabilistic Conformant Planning", + "abstract": "This paper introduces a counter-example based approach for solving probabilistic conformant planning (PCP) problems. Our algorithm incrementally generates candidate plans and identifies counter-examples until it finds a plan for which the probability of success is above the specified threshold. We prove that the algorithm is sound and complete. We further propose a variation of our algorithm that uses hitting sets to accelerate the generation of candidate plans. Experimental results show that our planner is particularly suited for problems with a high probability threshold.", + "authors": "Xiaodi Zhang, Alban Grastien, Charles Gretton", + "url": "https://openreview.net/forum?id=gg527bL2Oi", + "pdf": "https://openreview.net//pdf/c72b9ac6eedd782fa23cad9af82370c8a99c1b3e.pdf", + "primary_keywords": [ + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "A counter-example based approach to solve probabilistic conformant planning problems.", + "type": "long" + }, + { + "number": 201, + "title": "Tightest Admissible Shortest Path", + "abstract": "The shortest path problem in graphs is fundamental to AI. Nearly all variants of the problem and relevant algorithms that solve them ignore edge-weight computation time and its common relation to weight uncertainty. This implies that taking these factors into consideration can potentially lead to a performance boost in relevant applications. Recently, a generalized framework for weighted directed graphs was suggested, where edge-weight can be computed (estimated) multiple times, at increasing accuracy and run-time expense. We build on this framework to introduce the problem of finding the tightest admissible shortest path (TASP); a path with the tightest suboptimality bound on the optimal cost. This is a generalization of the shortest path problem to bounded uncertainty, where edge-weight uncertainty can be traded for computational cost. We present a complete algorithm for solving TASP, with guarantees on solution quality. Empirical evaluation supports the effectiveness of this approach.", + "authors": "Eyal Weiss, Ariel Felner, Gal Kaminka", + "url": "https://openreview.net/forum?id=U8YeuU3lNq", + "pdf": "https://openreview.net//pdf/bb3737382254824271a72bc8d98cf1ffcabf8707.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + } + ] + }, + "Heuristics": { + "papers": [ + { + "number": 158, + "title": "Efficiently Computing Transitions in Cartesian Abstractions", + "abstract": "Counterexample-guided Cartesian abstraction refinement yields strong heuristics for optimal classical planning. The approach iteratively finds a new abstract solution, checks where it fails for the original task and refines the abstraction to avoid the same failure in subsequent iterations. The main bottleneck of this refinement loop is the memory needed for storing all abstract transitions. To address this issue, we introduce an algorithm that efficiently computes abstract transitions on demand. This drastically reduces the memory consumption and allows us to solve tasks during the refinement loop and during the search that were previously out of reach.", + "authors": "Jendrik Seipp", + "url": "https://openreview.net/forum?id=pFkoT7I6FK", + "pdf": "https://openreview.net//pdf/3e45622ec30adc6306e5ee1927a2cdd0e29d2121.pdf", + "primary_keywords": "", + "long": false, + "tldr": "By computing transitions in Cartesian abstractions on demand, we can trade a bit of speed for much lower memory usage.", + "type": "short" + }, + { + "number": 374, + "title": "Merging or Computing Saturated Cost Partitionings? A Merge Strategy for the Merge-and-Shrink Framework", + "abstract": "The merge-and-shrink framework is a powerful tool for computing\n abstraction heuristics for optimal classical planning. Merging is one\n of its name-giving transformations. It entails computing the product\n of two factors of a factored transition system. To decide which two\n factors to merge, the framework uses a merge strategy. While there\n exist many merge strategies, it is generally unclear what constitutes\n a strong merge strategy, and a previous analysis shows that there is\n still lots of room for improvement with existing merge strategies. In\n this paper, we devise a new scoring function for score-based merge\n strategies based on answering the question whether merging two\n factors has any benefits over computing saturated cost partitioning\n heuristics over the factors instead. Our experimental evaluation\n shows that our new merge strategy achieves state-of-the-art\n performance on IPC benchmarks.", + "authors": "Silvan Sievers, Thomas Keller, Gabriele R\u00f6ger", + "url": "https://openreview.net/forum?id=eEaLP0iejz", + "pdf": "https://openreview.net//pdf/8780feec73638e880a9182d32d7e42f00714738d.pdf", + "primary_keywords": "", + "long": false, + "tldr": "We present a new state-of-the-art merge strategy.", + "type": "short" + }, + { + "number": 243, + "title": "Versatile Cost Partitioning with Exact Sensitivity Analysis", + "abstract": "Saturated post-hoc optimization is a powerful method for computing admissible heuristics for optimal classical planning. The approach solves a linear program (LP) for each state encountered during the search, which is computationally demanding. In this paper, we theoretically and empirically analyze to which extent we can reuse an LP solution of one state for another. We introduce a novel sensitivity analysis that can exactly characterize the set of states for which a unique LP solution is optimal. Furthermore, we identify two properties of the underlying LPs that affect reusability. Finally, we introduce an algorithm that optimizes LP solutions to generalize well to other states. Our new algorithms significantly reduce the number of necessary LP computations.", + "authors": "Paul H\u00f6ft, David Speck, Florian Pommerening, Jendrik Seipp", + "url": "https://openreview.net/forum?id=GZrgOvNGJx", + "pdf": "https://openreview.net//pdf/abaa4de774819bc07600640e44287065544d964a.pdf", + "primary_keywords": "", + "long": false, + "tldr": "", + "type": "short" + }, + { + "number": 108, + "title": "Abstraction Heuristics for Factored Tasks", + "abstract": "One of the strongest approaches for optimal classical planning is A$^*$ search with heuristics based on abstractions of the planning task. Abstraction heuristics are well studied in planning formalisms without conditional effects such as SAS$^+$. However, conditional effects are crucial to model many planning tasks compactly. In this paper, we focus on *factored* tasks which allow a specific form of conditional effect, where effects on variable $x$ can only depend on the value of $x$. We generalize projections, domain abstractions, Cartesian abstractions and the counterexample-guided abstraction refinement method to this formalism. While merge-and-shrink already covers factored task in theory, we provide an implementation that does so. In our experiments, we compare these abstraction-based heuristics to other heuristics supporting conditional effects, as well as symbolic search. On our new benchmark set of factored tasks, pattern database heuristics solve the most problems, followed by symbolic approaches on par with domain abstractions. The more general Cartesian abstractions fall behind in terms of coverage but usually solve problems the fastest among all tested approaches. The generality of merge-and-shrink abstractions does not seem to be beneficial for these factored tasks.", + "authors": "Clemens B\u00fcchner, Patrick Ferber, Jendrik Seipp, Malte Helmert", + "url": "https://openreview.net/forum?id=L0RdHrLWCY", + "pdf": "https://openreview.net//pdf/6fa710601eff873e88b612fccad31d4ded3ddf61.pdf", + "primary_keywords": "", + "long": true, + "tldr": "We extend abstraction heuristics to a planning formalism with certain kinds of conditional effects.", + "type": "long" + }, + { + "number": 123, + "title": "Towards Feasible Higher-Dimensional Potential Heuristics", + "abstract": "Potential heuristics assign numerical values\n(potentials) to state features, where each feature is a conjunction of\nfacts. It was previously shown that the informativeness of potential\nheuristics can be significantly improved\nby considering complex features,\nbut computing potentials over all pairs of facts\nis already too costly in practice.\nIn this paper, we investigate whether using just a few high-dimensional\nfeatures instead of all conjunctions up to dimension $n$ can result in\nimproved heuristics while keeping the computational cost at bay. We focus on (a)\nestablishing a framework for studying this kind of potential heuristics, and\n(b) whether it is reasonable to expect improvement with just a few\nconjunctions. For (a), we propose two compilations that encode each\nconjunction explicitly as a new fact so that we can compute\npotentials over conjunctions in the original task as one-dimensional\npotentials in the compilation.\nRegarding (b), we provide evidence that informativeness of potential\nheuristics can be significantly increased with a small set of conjunctions,\nand these improvements have positive impact on the number of solved tasks.", + "authors": "Daniel Fi\u0161er, Marcel Steinmetz", + "url": "https://openreview.net/forum?id=gjMdaoliyG", + "pdf": "https://openreview.net//pdf/b86de678ae12ae54e9e61d0bf2893e4a2850e48f.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 265, + "title": "Transition Landmarks from Abstraction Cuts", + "abstract": "We introduce *transition-counting constraints* as a principled tool to formalize constraints that must hold in every solution of a transition system. We show how to obtain *transition landmark* constraints from *abstraction cuts*. Transition landmarks dominate operator landmarks in theory but require solving a linear program that is prohibitively large in practice. We compare different approximations that replace transition-counting variables with more compact operator-counting variables. These are based on projections to operator landmarks and further relaxations. For one important special case, we show that the projection is lossless even for integer-valued variables. We finally discuss efficient data structures to derive cuts from abstractions and store them in a way that avoids repeated computation in every state. We compare the resulting heuristics and other heuristics both theoretically and on the IPC benchmarks.", + "authors": "Florian Pommerening, Clemens B\u00fcchner, Thomas Keller", + "url": "https://openreview.net/forum?id=AK9ACCp8fI", + "pdf": "https://openreview.net//pdf/9611b8d226b56aff84830e5950b33754e82de785.pdf", + "primary_keywords": "", + "long": true, + "tldr": "Transition-counting constraints are a principled tool to formalize constraints and can be used to express landmarks obtained from cuts in abstractions.", + "type": "long" + }, + { + "number": 6, + "title": "More Flexible Proximity Wildcards Path Planning with Compressed Path Databases", + "abstract": "Grid-based path planning is one of the familiar issues in AI, and a popular topic in application areas such as computer games and robotics. Compressed Path Databases (CPDs) are recognized as a state-of-the-art method for grid-based path planning. It is able to find an optimal path extremely fast without a state-space search. In recent years, researchers tend to focus on improving CPDs from reducing CPD size or improving lookup performance. Among various methods, proximity wildcards is one of the most proven improvements in reducing the size of CPD. However, its proximity area is significantly restricted by complex terrain, which has more significant impacts on pathfinding efficiency and generates more additional costs. In this paper we enhance CPDs from the perspective of improving search efficiency and reducing search costs. Our work is to break the limitation between length and width of the proximity area, and adopt more flexible approaches to avoid obstacles, so as to reduce its impact on the proximity area and improve the search efficiency. Experiments performed on the benchmarks from Grid-Based Path Planning Competition (GPPC) demonstrate that the two proposed methods can effectively improve search efficiency and reduce the search costs by 2-3 orders of magnitude. Remark\u0002ably, our methods can further reduce storage costs, and improve compression capability of CPDs simultaneously.", + "authors": "Xi Chen, Yue Zhang, Yonggang Zhang", + "url": "https://openreview.net/forum?id=Ho5f9bMplo", + "pdf": "https://openreview.net//pdf/df58be0cf1a58fe173028c0ccc9449092169639d.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + } + ] + }, + "LLMs and Neural Action Policies": { + "papers": [ + { + "number": 294, + "title": "Large Language Models as Planning Domain Generators", + "abstract": "The creation of planning models, and in particular domain models, is among the last bastions of tasks that require extensive manual labor in AI planning; it is desirable to simplify this process for the sake of making planning more accessible. To this end, we investigate whether large language models (LLMs) can be used to generate planning domain models from textual descriptions. We propose a novel task for this as well as a means of automated evaluation for generated domains by comparing the sets of plans for domain instances. Finally, we perform an empirical analysis of 7 large language models, including coding and chat models across 9 different planning domains. Our results show that LLMs, particularly larger ones, exhibit some level of proficiency in generating correct planning domains from natural language descriptions.", + "authors": "James Oswald, Kavitha Srinivas, Harsha Kokel, Junkyu Lee, Michael Katz, Shirin Sohrabi", + "url": "https://openreview.net/forum?id=C88wQIv0aJ", + "pdf": "https://openreview.net//pdf/3c39147a530076c7d7c7fd522d4cc04efd35ee93.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 245, + "title": "On the Prospects of Incorporating Large Language Models (LLMs) in Automated Planning and Scheduling (APS)", + "abstract": "Automated planning is among the growing areas in Artificial Intelligence (AI) where mention of LLMs has gained popularity.\nBased on a comprehensive review of 126 papers, this paper investigates eight categories based on the unique applications of LLMs in addressing various aspects of planning problems: language translation, plan generation, model construction, multi-agent planning, interactive planning, heuristics optimization, tool integration, and brain-inspired planning. For each category, we articulate the issues considered and existing gaps. A critical insight resulting from our review is that the true potential of LLMs unfolds when they are integrated with traditional symbolic planners, pointing towards a promising neuro-symbolic approach. This approach effectively combines the generative aspects of LLMs with the precision of classical planning methods. By synthesizing insights from existing literature, we underline the potential of this integration to address complex planning challenges. Our goal is to encourage the ICAPS community to recognize the complementary strengths of LLMs and symbolic planners, advocating for a direction in automated planning that leverages these synergistic capabilities to develop more advanced and intelligent planning systems.", + "authors": "Vishal Pallagani, Bharath Chandra Muppasani, Kaushik Roy, Francesco Fabiano, Andrea Loreggia, Keerthiram Murugesan, Biplav Srivastava, Francesca Rossi, Lior Horesh, Amit P. Sheth", + "url": "https://openreview.net/forum?id=BLsvMLvuhL", + "pdf": "https://openreview.net//pdf/65246a5aff16e88555d074f7d9d481513f4b33d8.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "This position paper investigates the enhancement of automated planning by Large Language Models (LLMs) across eight distinct domains.", + "type": "long" + }, + { + "number": 315, + "title": "SayNav: Grounding Large Language Models for Dynamic Planning to Navigation in New Environments", + "abstract": "Semantic reasoning and dynamic planning capabilities are crucial for an autonomous agent to perform complex navigation tasks in unknown environments. It requires a large amount of common-sense knowledge, that humans possess, to succeed in these tasks. We present SayNav, a new approach that leverages human knowledge from Large Language Models (LLMs) for efficient generalization to complex navigation tasks in unknown large-scale environments. SayNav uses a novel grounding mechanism, that incrementally builds a 3D scene graph of the explored environment as inputs to LLMs, for generating feasible and contextually appropriate high-level plans for navigation. The LLM-generated plan is then executed by a pre-trained low-level planner, that treats each planned step as a short-distance point-goal navigation sub-task. SayNav dynamically generates step-by-step instructions during navigation and continuously refines future steps based on newly perceived information. We evaluate SayNav on multi-object navigation (MultiON) task, that requires the agent to utilize a massive amount of human knowledge to efficiently search multiple different objects in an unknown environment. We also introduce a benchmark dataset for MultiON task employing ProcTHOR framework that provides large photo-realistic indoor environments with variety of objects. SayNav achieves state-of-the-art results and even outperforms an oracle based baseline with strong ground-truth assumptions by more than 8\\% in terms of success rate, highlighting its ability to generate dynamic plans for successfully locating objects in large-scale new environments.", + "authors": "Abhinav Rajvanshi, Karan Sikka, Xiao Lin, Bhoram Lee, Han-pang Chiu, Alvaro Velasquez", + "url": "https://openreview.net/forum?id=h3PBJcr18A", + "pdf": "https://openreview.net//pdf/940910fd0e04aed08a28f94f31c63166f88d1b16.pdf", + "primary_keywords": [ + "Learning", + "Robotics", + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 1002, + "title": "When Prolog Meets Generative Models: a New Approach for Managing Knowledge and Planning in Robotic Applications", + "abstract": "In this paper, we propose a robot oriented knowledge representation system based on the use of the Prolog language. Our framework hinges on a special organisation of Knowledge Base (KB) that enables: 1) its efficient population from natural language texts using semi-automated procedures based on Large Language Models (LLMs); 2) the seamless generation of temporal parallel plans for multi-robot systems through a sequence of transformations; 3) the automated translation of the plan into an executable formalism. The framework is supported by a set of open source tools and its functionality is shown with a realistic application.", + "authors": "Enrico Saccon, Ahmet Tikna, Davide De Martini, Edoardo Lamon, Luigi Palopoli, Marco Roveri", + "venue": "ICRA", + "url": "https://arxiv.org/abs/2309.15049", + "pdf": "https://arxiv.org/pdf/2309.15049.pdf", + "primary_keywords": [ + "Knowledge Representation/Engineering" + ], + "long": false, + "tldr": "", + "type": "prev" + }, + { + "number": 1004, + "title": "Integrating Action Knowledge and LLMs for Task Planning and Situation Handling in Open Worlds", + "abstract": "Task planning systems have been developed to help robots use human knowledge (about actions) to complete long-horizon tasks. Most of them have been developed for \u201cclosed worlds\u201d while assuming the robot is provided with complete world knowledge. However, the real world is generally open, and the robots frequently encounter unforeseen situations that can potentially break theplanner\u2019s completeness. Could we leverage the recent advances on pre-trained Large Language Models (LLMs) to enable classical planning systems to deal with novel situations? This paper introduces a novel framework, called COWP, for open-world task planning and situation handling. COWP dynamically augments the robot\u2019s action knowledge, including the preconditions and effects of actions, with task-oriented commonsense knowledge. COWP embraces the openness from LLMs, and is grounded to specific domains via action knowledge. For systematic evaluations, we collected a dataset that includes 1085 execution-time situations. Each situation corresponds to a state instance wherein a robot is potentially unable to complete a task using a solution that normally works. Experimental results show that our approach outperforms competitive baselines from the literature in the success rate of service tasks. Additionally, we have demonstrated COWP using a mobile manipulator. Supplementary materials are available at: https://cowplanning.github.io/", + "authors": "Yan Ding, Xiaohan Zhang, Saeid Amiri, Nieqing Cao, Hao Yang, Andy Kaminski, Chad Esselink, Shiqi Zhang", + "venue": "Autonomous Robotics", + "url": "https://link.springer.com/article/10.1007/s10514-023-10133-5", + "pdf": "https://arxiv.org/pdf/2305.17590.pdf", + "primary_keywords": [], + "long": false, + "tldr": "", + "type": "prev" + }, + { + "number": 367, + "title": "Learning General Policies for Planning through GPT Models", + "abstract": "Transformer-based architectures, such as T5, BERT and GPT, have shown revolutionary capabilities in Natural Language Processing. Several studies showed that deep learning models using these architectures not only possess a remarkable linguistic knowledge, but they also exhibit forms of factual knowledge, common sense, and even programming skills. However, the scientific community still debates about their reasoning capabilities, which have been recently tested in the context of automated AI planning; the literature presents mixed results, and the prevailing view is that current transformer-based models may not be adequate for planning.\nIn this paper, we addresses this challenge differently. We introduce a GPT-based model customised for planning (PlanGPT) to learn a general policy for classical planning by training the model from scratch with a dataset of solved planning instances. Once PlanGPT has been trained for a domain, it can be used to generate a solution plan for an input problem instance in that domain. Our training procedure exploits automated planning knowledge to enhance the performance of the trained model. We build and evaluate our GPT model with several planning domains, and we compare its performance w.r.t. other recent deep learning techniques for generalised planning, demonstrating the effectiveness of the proposed approach.", + "authors": "Nicholas Rossetti, Massimiliano Tummolo, Alfonso Gerevini, Luca Putelli, Ivan Serina, Mattia Chiari, Matteo Olivato", + "url": "https://openreview.net/forum?id=yB8oafJ8bu", + "pdf": "https://openreview.net//pdf/7577c812482957a467be55e6919acb2d73107ca9.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "The paper proposes an approach based on GPT and a system implementing it to learn general policies for automated planning.", + "type": "long" + }, + { + "number": 17, + "title": "Neural Action Policy Safety Verification: Applicablity Filtering", + "abstract": "Neural networks (NN) are an increasingly important representation of action policies pi. Applicability filtering is a commonly used practice in this context, restricting the action selection in pi to only applicable actions. Policy predicate abstraction (PPA) has recently been introduced to verify safety of neural pi, through over-approximating the state space subgraph induced by pi. Thus far however, PPA does not permit applicability filtering, which is challenging due to the additional constraints that need to be taken into account. Here we overcome that limitation, through a range of algorithmic enhancements. In our experiments, our enhancements achieve several orders of magnitude speed-up over a baseline implementation, bringing PPA with applicability filtering close to the performance of PPA without such filtering.", + "authors": "Marcel Vinzent, J\u00f6rg Hoffmann", + "url": "https://openreview.net/forum?id=jp5mNUcJTH", + "pdf": "https://openreview.net//pdf/08fabaee6ff6973a58c0b552c706716d3797c271.pdf", + "primary_keywords": "", + "long": false, + "tldr": "", + "type": "short" + } + ] + }, + "Robotics": { + "papers": [ + { + "number": 87, + "title": "Accelerating Search-Based Planning for Multi-Robot Manipulation by Leveraging Online-Generated Experiences", + "abstract": "An exciting frontier in robotic manipulation is the use of multiple arms at once. \nHowever, planning concurrent motions is a challenging task using current methods. The high-dimensional composite state space renders many well-known motion planning algorithms intractable.\nRecently, multi-agent path finding (MAPF) algorithms have shown promise in discrete 2D domains, providing rigorous guarantees. However, widely used conflict-based methods in MAPF assume an efficient single-agent motion planner. This poses challenges in adapting them to manipulation cases where this assumption does not hold, due to the high dimensionality of configuration spaces and the computational bottlenecks associated with collision checking.\nTo this end, we propose an approach for accelerating conflict-based search algorithms by leveraging their repetitive and incremental nature -- making them tractable for use in complex scenarios involving multi-arm coordination in obstacle-laden environments. \nWe show that our method preserves completeness and bounded sub-optimality guarantees, and demonstrate its practical efficacy through a set of experiments with up to 10 robotic arms.", + "authors": "Yorai Shaoul, Itamar Mishani, Maxim Likhachev, Jiaoyang Li", + "url": "https://openreview.net/forum?id=aq8LOMsYgc", + "pdf": "https://openreview.net//pdf/37bb74bb7b70af0207c19c462942c11db8169e7e.pdf", + "primary_keywords": [ + "Robotics", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 162, + "title": "Combined Task and Motion Planning Via Sketch Decompositions", + "abstract": "The challenge in combined task and motion planning (TAMP) is the effective integration of a search over a combinatorial space, usually carried out by a task planner, and a search over a continuous configuration space, carried out by a motion planner. Using motion planners for testing the feasibility of task plans and filling out the details is not effective because it makes the geometrical constraints play a passive role. This work introduces a new interleaved approach for integrating the two dimensions of TAMP that makes use of sketches, a recent simple but powerful language for expressing the decomposition of problems into subproblems. A sketch has width 1 if it decomposes the problem into subproblems that can be solved greedily in linear time. In the paper, a general sketch is introduced for several classes of TAMP problems which has width 1 under suitable assumptions. While sketch decompositions have been developed for classical planning, they offer two important benefits in the context of TAMP. First, when a task plan is found to be unfeasible due to the geometric constraints, the combinatorial search resumes in a specific subproblem. Second, the sampling of object configurations is not done once, globally, at the start of the search, but locally, at the start of each subproblem. Optimizations of this basic setting are also considered and experimental results over existing and new pick-and-place benchmarks are reported.", + "authors": "Mag\u00ed Dalmau-Moreno, Nestor Garcia, Vicen\u00e7 G\u00f3mez, Hector Geffner", + "url": "https://openreview.net/forum?id=mnfzSF746z", + "pdf": "https://openreview.net//pdf/87b16c17a38378f96067d05e447704b63955f905.pdf", + "primary_keywords": [ + "Robotics" + ], + "long": true, + "tldr": "Sketch decomposition boost solving complex Task and Motion Planning problems", + "type": "long" + }, + { + "number": 236, + "title": "Learning Quadruped Locomotion Policies using Logical Rules", + "abstract": "Quadruped animals are capable of exhibiting a diverse range of locomotion gaits. While progress has been made in demonstrating such gaits on robots, current methods rely on motion priors, dynamics models, or other forms of extensive manual efforts. People can use natural language to describe dance moves. Could one use a formal language to specify quadruped gaits? To this end, we aim to enable easy gait specification and efficient policy learning. Leveraging Reward Machines (RMs) for high-level gait specification over foot contacts, our approach is called RM-based Locomotion Learning (RMLL), and supports adjusting gait frequency at execution time. Gait specification is enabled through the use of a few logical rules per gait (e.g., alternate between moving front feet and back feet) and does not require labor-intensive motion priors. Experimental results in simulation highlight the diversity of learned gaits (including two novel gaits), their energy consumption and stability across different terrains, and the superior sample-efficiency when compared to baselines. We also demonstrate these learned policies with a real quadruped robot.", + "authors": "David DeFazio, Yohei Hayamizu, Shiqi Zhang", + "url": "https://openreview.net/forum?id=9ZerDaRxp3", + "pdf": "https://openreview.net//pdf/48cfb33fec036195874a30bf641b6a209d8dbcb6.pdf", + "primary_keywords": [ + "Learning", + "Robotics" + ], + "long": true, + "tldr": "Through reward machines, we specify and learn a diverse set of quadruped locomotion gaits which we demonstrate on hardware", + "type": "long" + }, + { + "number": 167, + "title": "Multi-Robot Connected Fermat Spiral Coverage", + "abstract": "We introduce Multi-Robot Connected Fermat Spiral (MCFS), a novel algorithmic framework for Multi-Robot Coverage Path Planning (MCPP) that adapts Coverage Fermat Spiral (CFS) from the computer graphics community to multi-robot coordination for the first time. MCFS uniquely enables the orchestration of multiple robots to generate coverage paths that contour around arbitrarily shaped obstacles, a feature notably lacking in traditional methods. Our framework not only enhances area coverage and optimizes task performance, particularly in terms of makespan, for workspaces rich in irregular obstacles but also addresses the challenges of path continuity and curvature critical for non-holonomic robots by generating smooth paths without decomposing the workspace. MCFS solves MCPP by constructing a graph of isolines and transforming MCPP into a combinatorial optimization problem, aiming to minimize the makespan while covering all vertices. \nOur contributions include developing a unified CFS version for scalable and adaptable MCPP, extending it to MCPP with novel optimization techniques for cost reduction and path continuity and smoothness, and demonstrating through extensive experiments that MCFS outperforms existing MCPP methods in makespan, path curvature, coverage ratio, and overlapping ratio. Our research marks a significant step in MCPP, showcasing the fusion of computer graphics and automated planning principles to advance the capabilities of multi-robot systems in complex environments.", + "authors": "Jingtao Tang, Hang Ma", + "url": "https://openreview.net/forum?id=RQXCs02X7I", + "pdf": "https://openreview.net//pdf/99c206441b8084302c99d6c2fa5058ab3c6d4a2d.pdf", + "primary_keywords": [ + "Robotics", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "An algorithmic framework tailored for challenging multi-robot coverage path planning tasks, optimizing both makespan and path smoothness.", + "type": "long" + }, + { + "number": 1001, + "title": "Effort Level Search in Infinite Completion Trees with Application to Task-and-Motion Planning", + "abstract": "Solving a Task-and-Motion Planning (TAMP) problem can be represented as a sequential (meta-) decision process, where early decisions concern the skeleton (sequence of logic actions) and later decisions concern what to compute for such skeletons (e.g., action parameters, bounds, RRT paths, or full optimal manipulation trajectories). We consider the general problem of how to schedule compute effort in such hierarchical solution processes. More specifically, we introduce infinite completion trees as a problem formalization, where before we can expand or evaluate a node, we have to solve a preemptible computational sub-problem of a priori unknown compute effort. Infinite branchings represent an infinite choice of random initializations of computational sub-problems. Decision making in such trees means to decide on where to invest compute or where to widen a branch. We propose a heuristic to balance branching width and compute depth using polynomial level sets. We show completeness of the resulting solver and that a round robin baseline strategy used previously for TAMP becomes a special case. Experiments confirm the robustness and efficiency of the method on problems including stochastic bandits and a suite of TAMP problems, and compare our approach to a round robin baseline. An appendix comparing the framework to bandit methods and proposing a corresponding tree policy version is found on the supplementary webpage: https://www.user.tu-berlin.de/mtoussai/24-CompletionTrees/.", + "authors": "Marc Toussaint, Joaquim Ortiz-Haro, Valentin N. Hartmann, Erez Karpas, Wolfgang H\u00f6nig", + "venue": "ICRA", + "url": "https://www.user.tu-berlin.de/mtoussai/24-CompletionTrees/", + "pdf": "https://www.user.tu-berlin.de/mtoussai/24-CompletionTrees/24-toussaint-ICRA.pdf", + "primary_keywords": [ + "Robotics" + ], + "long": false, + "tldr": "TAMP problems are typically solved in steps, by solving a series of sub-problems. We to address the decision problem of where to invest compute when searching over possible sequences of sub-problems.", + "type": "prev" + }, + { + "number": 1006, + "title": "Right Place, Right Time: Proactive Multi-Robot Task Allocation Under Spatiotemporal Uncertainty", + "abstract": "For many multi-robot problems, tasks are announced during execution, where task announcement times and locations are uncertain. To synthesise multi-robot behaviour that is robust to early announcements and unexpected delays, multi-robot task allocation methods must explicitly model the stochastic processes that govern task announcement. In this paper, we model task announcement using continuous-time Markov chains which predict when and where tasks will be announced. We then present a task allocation framework which uses the continuous-time Markov chains to allocate tasks proactively, such that robots are near or at the task location upon its announcement. Our method seeks to minimise the expected total waiting duration for each task, ie the duration between task announcement and a robot beginning to service the task. Our framework can be applied to any multi-robot task allocation problem where robots complete spatiotemporal tasks which are announced stochastically. We demonstrate the efficacy of our approach in simulation, where we outperform baselines which do not allocate tasks proactively, or do not fully exploit our task announcement models.", + "authors": "Charlie Street, Bruno Lacerda, Manual M\u00fchlig, Nick Hawes", + "venue": "JAIR", + "url": "https://www.jair.org/index.php/jair/article/view/15057", + "pdf": "https://dl.acm.org/doi/pdf/10.1613/jair.1.15057", + "primary_keywords": [ + "Temporal Planning" + ], + "long": false, + "tldr": "", + "type": "prev" + }, + { + "number": 1008, + "title": "Solving Multi-Agent Target Assignment and Path Finding with a Single Constraint Tree", + "abstract": "The Combined Target-Assignment and Path- Finding (TAPF) problem requires simultaneously assigning targets to agents and planning collision-free paths for them from their start locations to their assigned targets. As a leading approach to addressing TAPF, Conflict-Based Search with Target Assignment (CBS-TA) leverages K-best target assignments to create multiple search trees and Conflict-Based Search (CBS) to resolve collisions in each tree. While CBS- TA finds optimal solutions, it faces scalability challenges due to the duplicated collision resolution in multiple trees and the expensive computation of K-best assignments. We introduce Incremental Target Assignment CBS (ITA-CBS) to bypass these two computational bottlenecks. ITA-CBS generates only a single search tree and avoids computing K-best assignments by incrementally computing new 1-best assignments during the search. We show that ITA-CBS, in theory, is guaranteed to find optimal solutions and, in practice, runs faster than CBS-TA in 96.1% of 6,334 test cases.", + "authors": "Yimin Tang, Zhongqiang Ren, Jiaoyang Li, Katia Sycara", + "venue": "MRS", + "url": "https://arxiv.org/abs/2307.00663", + "pdf": "https://arxiv.org/abs/2307.00663.pdf", + "primary_keywords": [], + "long": false, + "tldr": "", + "type": "prev" + } + ] + }, + "Robots and Space": { + "papers": [ + { + "number": 320, + "title": "SLAMuZero: Plan and learn to Map for Joint SLAM and Navigation", + "abstract": "MuZero has demonstrated remarkable performance in board and video games where Monte Carlo tree search (MCTS) method is utilized to learn and adapt to different game environments. This paper leverages the strength of MuZero to enhance agents\u2019 planning capability for joint active simultaneous localization and mapping (SLAM) and navigation tasks, which require an agent to navigate an unknown environment while simultaneously constructing a map and localizing itself. We propose SLAMuZero, a novel approach for joint SLAM and navigation, which employs a search process that uses an explicit encoder-decoder architecture for mapping, followed by a prediction function to evaluate policy and value based on the generated map. SLAMuZero outperforms the state-of-the-art baseline and significantly reduces training time, underscoring the efficiency of our approach. Additionally, we develop a new open source library for implementing SLAMuZero, which is a flexible and modular toolkit for researchers and practitioners.", + "authors": "Bowen Fang, Xu Chen, Zhengkun Pan, Xuan Di", + "url": "https://openreview.net/forum?id=IpvczRU4Lo", + "pdf": "https://openreview.net//pdf/ff5460f71df12e6fe45370c07d9a06b62a30a3ba.pdf", + "primary_keywords": [ + "Applications", + "Learning" + ], + "long": false, + "tldr": "", + "type": "short" + }, + { + "number": 1003, + "title": "Dynamic Targeting to Improve Earth Science Missions", + "abstract": "Dynamic targeting (DT) is an emerging concept in which data from a lookahead instrument are used to intelligently reconfigure and point a primary instrument to enhance science return. For example, in the smart ice hunting radar (Smart Ice Cloud Sensing project), a forward-looking radiometer is used to detect deep convective ice storms, which are then targeted using a radar. In other concepts, forward-looking sensors are used to detect clouds so that a primary sensor can avoid them. To this end, we have developed several algorithms from operations research and an artificial intelligence/heuristic search to point/reconfigure the dynamic instrument. We present simulation studies of DT for these concepts and benchmark these algorithms to show that DT is a powerful tool with the potential to significantly improve instrument science yield.", + "authors": "Alberto Candela, Jason Swope, Steve A. Chien", + "venue": "Aerospace Research Central", + "url": "https://arc.aiaa.org/doi/10.2514/1.I011233", + "pdf": "candela.pdf", + "primary_keywords": [ + "Applications" + ], + "long": false, + "tldr": "", + "type": "prev" + }, + { + "number": 1007, + "title": "EELS: Autonomous snake-like robot with task and motion planning capabilities for ice world exploration", + "abstract": "Ice worlds are at the forefront of astrobiological interest because of the evidence of subsurface oceans. Enceladus in particular is unique among the icy moons because there are known vent systems that are likely connected to a subsurface ocean, through which the ocean water is ejected to space. An existing study has shown that sending small robots into the vents and directly sampling the ocean water is likely possible. To enable such a mission, NASA\u2019s Jet Propulsion Laboratory is developing a snake-like robot called Exobiology Extant Life Surveyor (EELS) that can navigate Enceladus\u2019 extreme surface and descend an erupting vent to capture unaltered liquid samples and potentially reach the ocean. However, navigating to and through Enceladus\u2019 environment is challenging: Because of the limitations of existing orbital reconnaissance, there is substantial uncertainty with respect to its geometry and the physical properties of the surface/vents; communication is limited, which requires highly autonomous robots to execute the mission with limited human supervision. Here, we provide an overview of the EELS project and its development effort to create a risk-aware autonomous robot to navigate these extreme ice terrains/environments. We describe the robot\u2019s architecture and the technical challenges to navigate and sense the icy environment safely and effectively. We focus on the challenges related to surface mobility, task and motion planning under uncertainty, and risk quantification. We provide initial results on mobility and risk-aware task and motion planning from field tests and simulated scenarios.", + "authors": "T. S. Vaquero, G. Daddi, R. Thakker, M. Paton, A. Jasour, M. P. Strub, R. M. Swan, R. Royce, M. Gildner, P. Tosi, M. Veismann, P. Gavrilov, E. Marteau, J. Bowkett, D. Loret de Mola Lemus, Y. Nakka, B. Hockman, A. Orekhov, T. D. Hasseler, C. Leake, B. Nuernberger, P. Proen\u00e7a, W. Reid, W. Talbot, N. Georgiev, T. Pailevanian, A. Archanian, E. Ambrose, J. Jasper, R. Etheredge, C. Roman, D. Levine, K. Otsu, S. Yearicks, H. Melikyan, R. R. Rieber, K. Carpenter, J. Nash, A. Jain, L. Shiraishi, M. Robinson, M. Travers, H. Choset, J. Burdick, A. Gardner, M. Cable, M. Ingham, M. Ono", + "venue": "Science Robotics Journal", + "url": "https://www.science.org/doi/10.1126/scirobotics.adh8332", + "pdf": "https://www.science.org/doi/epdf/10.1126/scirobotics.adh8332", + "primary_keywords": [ + "Applications" + ], + "long": false, + "tldr": "", + "type": "prev" + }, + { + "number": 20, + "title": "Rethinking Mutual Information for Language Conditioned Skill Discovery on Imitation Learning", + "abstract": "Language-conditioned robot behavior plays a vital role in executing complex tasks by associating human commands or instructions with perception and actions. The ability to compose long-horizon tasks based on unconstrained language instructions necessitates the acquisition of a diverse set of general-purpose skills.However, acquiring inherent primitive skills in a coupled and long-horizon environment without external rewards or human supervision presents significant challenges. In this paper, we evaluate the relationship between skills and language instructions from a mathematical perspective, employing two forms of mutual information within the framework of language-conditioned policy learning.To maximize the mutual information between language and skills in an unsupervised manner, we propose an end-to-end imitation learning approach known as Language Conditioned Skill Discovery (LCSD). Specifically, we utilize vector quantization to learn discrete latent skills and leverage skill sequences of trajectories to reconstruct high-level semantic instructions.Through extensive experiments on language-conditioned robotic navigation and manipulation tasks, encompassing BabyAI, LORel, and Calvin, we demonstrate the superiority of our method over prior works. Our approach exhibits enhanced generalization capabilities towards unseen tasks, improved skill interpretability, and notably higher rates of task completion success.", + "authors": "Zhaoxun Ju, Chao Yang, Fuchun Sun, Hongbo Wang, Yu Qiao", + "url": "https://openreview.net/forum?id=8VdptRkRYW", + "pdf": "https://openreview.net//pdf/c767968ff65db9707a3af1849dbe6fe3d107737f.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "we propose an end-to-end imitation learning approach for language conditioned environments with skill learning method.", + "type": "long" + }, + { + "number": 57, + "title": "Decentralized, Decomposition-Based Observation Scheduling for a Large-Scale Satellite Constellation", + "abstract": "Deploying multi-satellite constellations for Earth observation requires coordinating potentially hundreds or thousands of spacecraft. Centralized approaches to observation scheduling rely on a single controller planning the actions of each satellite. With increasing on-board capability for autonomy, we can view the constellation as a multi-agent system (MAS) and employ decentralized scheduling solutions. We formulate the problem as a distributed constraint optimization problem (DCOP) and desire limited inter-agent communication. Due to the scale and structure of the problem, existing DCOP algorithms are inadequate for this application. We develop a scheduling approach that employs a well-coordinated heuristic to decompose the global DCOP into sub-problems as to enable the application of DCOP algorithms. Building on previous work, we present the Neighborhood Stochastic Search (NSS) algorithm, a decentralized algorithm to effectively solve the Earth observing multi-satellite constellation scheduling problem. In this paper, we identify the roadblocks of deploying DCOP solvers to a large-scale, real-world problem, propose a decomposition-based scheduling approach that is effective at tackling large scale DCOPs, empirically evaluate the approach against other baselines to demonstrate the effectiveness, and discuss the generality of the approach.", + "authors": "Itai Zilberstein, Ananya Rao, Matthew Salis, Steve Chien", + "url": "https://openreview.net/forum?id=2xNKE2VMeg", + "pdf": "https://openreview.net//pdf/42205497fb3c1eb3adb236732c267f7175dfc8da.pdf", + "primary_keywords": [ + "Applications", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + } + ] + }, + "Explainability": { + "papers": [ + { + "number": 247, + "title": "Contrastive Explanations of Centralized Multi-agent Optimization Solutions", + "abstract": "In many real-world scenarios, agents are involved in optimization problems. Since most of these scenarios are over-constrained, optimal solutions do not always satisfy all agents. Some agents might be unhappy and ask questions of the form \u201cWhy does solution S not satisfy property P ?\u201d. We propose CMAOE, a domain-independent approach to obtain contrastive explanations by: (i) generating a new solution S\u2032 where property P is enforced, while also minimizing the differences between S and S\u2032; and (ii) highlighting the differences between the two solutions, with respect to the features of the objective function of the multi-agent system. Such explanations aim to help agents understanding why the initial solution is better in the context of the multi-agent system than what they expected. We have carried out a computational evaluation that shows that CMAOE can generate contrastive explanations for large multi-agent optimization problems. We\nhave also performed an extensive user study in four different domains that shows that: (i) after being presented with these explanations, humans\u2019 satisfaction with the original solution increases; and (ii) the constrastive explanations generated by CMAOE are preferred or equally preferred by humans over the ones generated by state of the art approaches.", + "authors": "Parisa Zehtabi, Alberto Pozanco, Ayala Bolch, Daniel Borrajo, Sarit Kraus", + "url": "https://openreview.net/forum?id=vAVqNKSyz4", + "pdf": "https://openreview.net//pdf/5369ba4d37c4d5f3f6da82e1e799fffb8c35f485.pdf", + "primary_keywords": [ + "Human-aware Planning and Scheduling" + ], + "long": true, + "tldr": "Developing an algorithm to generate contrastive explanations for multi-agent optimization solutions.", + "type": "long" + }, + { + "number": 142, + "title": "Explaining Plan Quality Differences", + "abstract": "In this paper we describe a method for explaining the differences between the quality of plans produced for similar planning problems. The method exploits a process of abstracting away details of the planning problems until the difference in the quality of the solutions they support has been minimised. We give a general definition of a valid abstraction of a planning problem. We then give the details of the implementation of a number of useful abstractions. Finally, we present a depth-bounded breadth-first search algorithm for finding suitable abstractions for explanations; and detail the results of an evaluation of the approach.", + "authors": "Benjamin Lewis Krarup, David E Smith, Derek Long, Amanda Jane Coles", + "url": "https://openreview.net/forum?id=OqoRNL1D8N", + "pdf": "https://openreview.net//pdf/669830bade47629807457d98596981ac301e84bb.pdf", + "primary_keywords": [ + "Human-aware Planning and Scheduling" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 195, + "title": "Explaining the Space of SSP Policies via Policy-Property Dependencies: Complexity, Algorithms, and Relation to Multi-Objective Planning", + "abstract": "Stochastic shortest path (SSP) problems are a common framework for planning under uncertainty. However, the reactive structure of their solution policies is typically not easily comprehensible by an end-user, while planners neither justify the reasons behind their choice of a particular policy over others. To strengthen confidence in the planner's decision-making, recent work in classical planning has introduced a framework for explaining to the user the possible solution space in terms of necessary trade-offs between user-provided plan properties. Here, we extend this framework to SSPs. We introduce a notion of policy properties taking into account action-outcome uncertainty. We analyze formally the computational problem of identifying the exclusion relationships between policy properties, showing that this problem is in fact harder than SSP planning in a complexity theoretical sense. We show that all the relationships can be identified through a series of heuristic searches, which, if ordered in a clever way, yields an anytime algorithm. Further, we introduce an alternative method, which leverages a connection to multi-objective probabilistic planning to move all the computational burden to a pre-process. Finally, we explore empirically the feasibility of the proposed explanation methodology on a range of adapted IPPC benchmarks.", + "authors": "Marcel Steinmetz, Sylvie Thiebaux, Daniel H\u00f6ller, Florent Teichteil-K\u00f6nigsbuch", + "url": "https://openreview.net/forum?id=U9rREnksRe", + "pdf": "https://openreview.net//pdf/b3080bfd4bbd730d83103bf8e1082043e2836987.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 86, + "title": "Safe Explicable Planning", + "abstract": "Human expectations stem from their knowledge about the others and the world. Where human-AI interaction is concerned, such knowledge may be inconsistent with the ground truth, resulting in the AI agent not meeting its expectations and degraded team performance. Explicable planning was previously introduced as a novel planning approach to reconciling human expectations and the agent\u2019s optimal behavior for more interpretable decision-making. One critical issue that remains unaddressed is safety in explicable planning since it can lead to explicable behaviors that are unsafe. We propose Safe Explicable Planning (SEP) to extend the prior work to support the specification of a safety bound. The objective of SEP is to search for behaviors that are close to the human\u2019s expectations while satisfying the bound on the agent\u2019s return, the safety criterion chosen in this work. We show that the problem generalizes the consideration of multiple objectives to multiple models and our formulation introduces a Pareto set. Under such a formulation, we propose a novel exact method that returns the Pareto set of safe explicable policies, a more efficient greedy method that returns one of the Pareto optimal policies, and approximate solutions for them based on the aggregation of states to further scalability. Formal proofs are provided to validate the desired theoretical properties of the exact and greedy methods. We evaluate our methods both in simulation and with physical robot experiments. Results confirm the validity and efficacy of our methods for safe explicable planning.", + "authors": "Akkamahadevi Hanni, Andrew Boateng, Yu Zhang", + "url": "https://openreview.net/forum?id=NzqoTIhqN3", + "pdf": "https://openreview.net//pdf/b9de01d4be50fbd49e69c4d041c64a3fbc0ef4b4.pdf", + "primary_keywords": [ + "Human-aware Planning and Scheduling" + ], + "long": true, + "tldr": "This paper introduces a planning formulation to find a human expected policy such that the safety bounds in the agent's model are satisfied.", + "type": "long" + } + ] + }, + "Temporal Planning": { + "papers": [ + { + "number": 66, + "title": "Converting Simple Temporal Networks with Uncertainty into Minimal Equivalent Dispatchable Form", + "abstract": "A Simple Temporal Network with Uncertainty (STNU) is a structure\nfor representing and reasoning about time constraints on actions that\nmay have uncertain durations. \nAn STNU is dynamically controllable (DC) if there exists a dynamic strategy \nfor executing the network that guarantees that all of its constraints will be satisfied \nno matter how the uncertain durations turn out---within their specified bounds. \nHowever, such strategies typically require exponential space. \nTherefore, converting a DC STNU into a so-called dispatchable form \nfor practical applications is essential. \nThe relevant portions of a real-time execution strategy for a dispatchable\nSTNU can be incrementally constructed during execution, requiring only $O(n^2)$ space,\nwhile also providing maximum flexibility and minimal computation during the execution\nof the network. \nAlthough existing algorithms can generate equivalent-dispatchable STNUs,\nthey do not guarantee a minimal number of edges in the STNU graph. \nSince the number of edges directly impacts the computations during execution, this paper\npresents a novel algorithm for converting any dispatchable STNU into \nan equivalent dispatchable network having a minimal number of edges. \nThe complexity of the algorithm is $O(kn^3)$, where $k$ is the number of actions with uncertain durations, \nand $n$ is the number of timepoints in the network. \nThe paper also provides an empirical evaluation of the reduction of edges obtained by the impact of the new algorithm.", + "authors": "Luke Hunsberger, Roberto Posenato", + "url": "https://openreview.net/forum?id=g2ljsE4QrJ", + "pdf": "https://openreview.net//pdf/b5b04c9d4d5ea7d62272c41995fc48d99448e735.pdf", + "primary_keywords": [ + "Temporal Planning" + ], + "long": true, + "tldr": "A new polynomial-time algorithm to convert any dispatchable STNU into an equivalent dispatchable network having a minimal number of edges.", + "type": "long" + }, + { + "number": 291, + "title": "Multi-Agent Temporal Task Solving and Plan Optimization", + "abstract": "Several multi-agent techniques are utilized to reduce the complexity of classical planning tasks, however, their applicability to temporal planning domains is a currently open line of study in the field of Automated Planning.\n\nIn this paper, we present MA-LAMA, a centralized, unthreated, satisfying, total-order, multi-agent temporal planner, that exploits the 'multi-agent nature' of temporal domains to, as its predecessor LAMA, perform plan optimization.\n\nIn MA-LAMA, temporal tasks are translated to the constrained snap-actions paradigm, and an automatic agent decomposition, goal assignment and required cooperation analysis are carried to build independent search steps, called Search Phases. These Search Phases are then solved by consecutive agent local searches, using classical heuristics and temporal constraints.\n\nExperimentation shows that MA-LAMA is able to solve a wide range of classical and temporal multi-agent domains, performing significantly better in plan quality than other state-of-the-art temporal planners.", + "authors": "Javier Caballero Test\u00f3n, MariaD. R-Moreno", + "url": "https://openreview.net/forum?id=sPSw73rhQB", + "pdf": "https://openreview.net//pdf/57ee0a57038d93de762712ed95cccc33b547778b.pdf", + "primary_keywords": [ + "Temporal Planning", + "Multi-Agent Planning" + ], + "long": true, + "tldr": "We present MA-LAMA, a centralized, unthreaded, satisfying, total-order, multi-agent temporal planner with task decomposition and required cooperation techniques", + "type": "long" + }, + { + "number": 119, + "title": "Optimal Infinite Temporal Planning: Cyclic Plans for Priced Timed Automata", + "abstract": "Many applications require infinite plans ---i.e. an infinite sequence of actions--- in order to carry out some given process indefinitely. In addition, it is desirable to guarantee optimality. In this paper, we address this problem in the setting of doubly-priced timed automata, where we show how to efficiently compute ratio-optimal cycles for optimal infinite plans. For efficient computation, we present symbolic $\\lambda$-deduction (S-$\\lambda$D), an any-time algorithm that uses a symbolic representation (priced zones) to search the state-space with a compact representation of the time constraints. Our approach guarantees termination while arriving at an optimal solution. Our experimental evaluation shows that S-$\\lambda$D outperforms the alternative of searching in the concrete state space, is very robust with respect to fine-grained temporal constraints, and has a very good anytime behaviour.", + "authors": "Rasmus Gr\u00f8nkj\u00e6r Tollund, Nicklas Slorup Johansen, Kristian \u00d8dum Nielsen, Alvaro Torralba, Kim Guldstrand Larsen", + "url": "https://openreview.net/forum?id=XN2jy5pLnf", + "pdf": "https://openreview.net//pdf/eafe93999e4515747b13d58df821088c989b1678.pdf", + "primary_keywords": [ + "Temporal Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 9, + "title": "Planning and Acting While the Clock Ticks", + "abstract": "Standard temporal planning assumes that planning takes place offline, and then execution starts at time 0. Recently, situated temporal planning was introduced, where planning starts at time 0, and execution occurs after planning terminates. Situated temporal planning reflects a more realistic scenario where planning is online. However, in situated temporal planning a complete plan must be generated before any action is executed. In some problems with extreme time pressure, timing is too tight to complete planning before the first action must be executed. For example, an autonomous car which has a truck backing towards it should probably move out of the way now, and plan how to get to its destination later.\nIn this paper, we propose a new problem, called concurrent planning and execution, in which actions can be dispatched (executed) before planning terminates. Unlike previous settings, we must handle wall clock deadlines that affect action applicability and goal achievement (as in situated planning) while also supporting dispatching actions before a complete plan has been found. We extend previous work on metareasoning for situated temporal planning to develop an algorithm for this new setting. Our empirical evaluation shows that when there is extreme time pressure, our approach outperforms situated temporal planning.", + "authors": "Andrew Coles, Erez Karpas, Andrey Lavrinenko, Wheeler Ruml, Solomon Eyal Shimony, Shahaf S. Shperberg", + "url": "https://openreview.net/forum?id=XmyjoEAi7q", + "pdf": "https://openreview.net//pdf/ee275f1f7f107d9bf51620bc2d6add05cbe981d1.pdf", + "primary_keywords": [ + "Temporal Planning" + ], + "long": true, + "tldr": "We propose a new problem, called concurrent planning and execution, in which actions can be dispatched (executed) before planning terminates, and propose an algorithm for solving it.", + "type": "long" + }, + { + "number": 287, + "title": "Progressive State Space Disaggregation for Infinite Horizon Dynamic Programming", + "abstract": "High dimensionality of model-based Reinforcement Learning and Markov Decision Processes can be reduced using abstractions of the state and action spaces. Although hierarchical learning and state abstraction methods have been explored over the past decades, explicit methods to build useful abstractions of models are rarely provided. In this work, we provide a new state abstraction method for solving infinite horizon problems in the discounted and total settings. Our approach is to progressively disaggregate abstract regions by iteratively slicing aggregations of states relatively to a value function. The distinguishing feature of our method, in contrast to previous approximations of the Bellman operator, is the disaggregation of regions during value function iterations (or policy evaluation steps). The objective is to find a more efficient aggregation that reduces the error on each piece of the partition. We provide a proof of convergence for this algorithm without making any assumptions about the structure of the problem. We also show that this process decreases the computational complexity of the Bellman operator iteration and provides useful abstractions. We then plug this state space disaggregation process in classical Dynamic Programming algorithm namely Approximate Value Iteration, Q-Value Iteration and Policy Iteration. Finally, we conduct a numerical comparison on randomly generated MDPs as well as classical MDPs. Those experiments show that our policy-based algorithm is faster than both traditional dynamic programming approach and recent aggregative methods that use a fixed number of adaptive partitions.", + "authors": "Orso Forghieri, Erwan Le Pennec, Hind Castel, Emmanuel Hyon", + "url": "https://openreview.net/forum?id=kEaoy85QFP", + "pdf": "https://openreview.net//pdf/232023b6ef05ac588f818b7a5e038e239b927400.pdf", + "primary_keywords": [ + "Theory", + "Temporal Planning" + ], + "long": true, + "tldr": "We provide an algorithm that progressively disaggregate the state space of an MDP to approximate the optimal value function with guarantees and no assumptions on the MDP..", + "type": "long" + }, + { + "number": 10, + "title": "On Verifying Linear Execution Strategies in Planning Against Nature", + "abstract": "While planning and acting in environments in which nature can trigger non-deterministic events, the agent has to consider that the state of the environment might change without its consent. Practically, it means that the agent has to make sure that it eventually achieves its goal (if possible) despite the acts of nature.\nIn this paper, we first formalize the semantics of such problems in Alternating-time Temporal Logic, which allows us to prove some theoretical properties of different types of solutions. Then, we focus on linear execution strategies, which resemble classical plans in that they follow a fixed sequence of actions. We show that any problem that can be solved by a linear execution strategy can be solved by a particular form of linear execution strategy which assigns wait-for preconditions to each action in the plan, that specifies when to execute that action. Then, we propose a sound algorithm that verifies a sequence of actions and assigns wait-for preconditions to them by leveraging abstraction.", + "authors": "Lukas Chrpa, Erez Karpas", + "url": "https://openreview.net/forum?id=LHEJ7mxUHK", + "pdf": "https://openreview.net//pdf/f5b7beef82eaa490aba551a95c8952a950a918a6.pdf", + "primary_keywords": "", + "long": true, + "tldr": "We address problems of planning against nature, and propose algorithms for synthesizing linear execution strategies.", + "type": "long" + } + ] + }, + "Hybrid and Numeric Planning": { + "papers": [ + { + "number": 227, + "title": "Learning Generalised Policies for Numeric Planning", + "abstract": "We extend Action Schema Networks (ASNets) to learn generalised policies for numeric planning, which features quantitative numeric state variables, preconditions and effects. We propose a neural network architecture that can reason about the numeric variables both directly and in context of other variables. We also develop a dynamic exploration algorithm for more efficient training, by better balancing the exploration versus learning tradeoff to account for the greater computational demand of numeric teacher planners. Experimentally, we find that the learned generalised policies are capable of outperforming traditional numeric planners on some domains, and the dynamic exploration algorithm to be on average much faster at learning effective generalised policies than the original ASNets training algorithm.", + "authors": "Ryan Xiao Wang, Sylvie Thiebaux", + "url": "https://openreview.net/forum?id=4jxPNxw8tM", + "pdf": "https://openreview.net//pdf/e65aef6d6f5bc7222fc61b7bc350b8f8ca7e7d41.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "We propose a neural network architecture for learning generalised policies for numeric planning.", + "type": "long" + }, + { + "number": 146, + "title": "PDDL+ Models for Deployable yet Effective Traffic Signal Optimisation", + "abstract": "The use of planning techniques in traffic signal optimisation has proven effective in managing unexpected traffic conditions as well as typical traffic patterns. However, significant challenges concerning the deployability of generated signal strategies remain, as existing approaches tend not to consider constraints and features of the actual real-world infrastructure on which they will be implemented. \n\nTo address this challenge, we introduce a range of PDDL+ models embodying technological requirements as well as insights from domain experts. The proposed models have been extensively tested on historical data using a range of well-known search strategies and heuristics, as well as alternative encodings. Results demonstrate their competitiveness with the state of the art.", + "authors": "Anas El Kouaiti, Francesco Percassi, Alessandro Saetti, Thomas Leo McCluskey, Mauro Vallati", + "url": "https://openreview.net/forum?id=kdXB1NXiy7", + "pdf": "https://openreview.net//pdf/eb143e7b5f8b1d92c00894cda67bde3b527b0a93.pdf", + "primary_keywords": [ + "Applications" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 156, + "title": "Taming Discretised PDDL+ through Multiple Discretisations", + "abstract": "The PDDL+ formalism allows the use of planning techniques in applications that require the ability to perform hybrid discrete-continuous reasoning. PDDL+ problems are notoriously challenging to tackle, and to reason upon them a well-established approach is discretisation. Existing systems rely on a single discretisation delta or, at most, two: a simulation delta to model the dynamics of the environment, and a planning delta, that is used to specify when decisions can be taken. However, there exist cases where this rigid schema is not ideal, for instance when agents with very different speeds need to cooperate or interact in a shared environment, and a more flexible approach that can accommodate more deltas is necessary. To address the needs of this class of hybrid planning problems, in this paper we introduce a reformulation approach that allows the encapsulation of different levels of discretisation in PDDL+ models, hence allowing any domain-independent planning engine to reap the benefits. Further, we provide the community with a new set of benchmarks that highlights the limits of fixed discretisation.", + "authors": "Matteo Cardellini, Marco Maratea, Francesco Percassi, Enrico Scala, Mauro Vallati", + "url": "https://openreview.net/forum?id=uGPxADY13f", + "pdf": "https://openreview.net//pdf/4dabb535930b1f7383f9feea5f2c0afb44730cb6.pdf", + "primary_keywords": [ + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "Tackling PDDL+ problems is challenging and discretization is a common way to solve it. This paper proposes a reformulation approach allowing flexible encapsulation of multiple and dynamic discretization steps directly in PDDL+ models.", + "type": "long" + }, + { + "number": 326, + "title": "JaxPlan and GurobiPlan: Optimization Baselines for Replanning in Discrete and Mixed Discrete and Continuous Probabilistic Domains", + "abstract": "Replanning methods that determinize a stochastic planning problem and replan at each action step have long been known to provide strong baseline (and even competition winning) solutions to discrete probabilistic planning problems. Recent work has explored the extension of replanning methods to the case of mixed discrete and continuous probabilistic domains by leveraging MILP compilations of the RDDL specification language. Other recent advances in probabilistic planning have explored the compilation of structured mixed discrete and continuous RDDL domains into a determinized computation graph that also lends itself to replanning via so-called planning by backpropagation methods. However, to date, there has not been any comprehensive comparison of these recent optimization-based replanning methodologies to the state-of-the-art winner of the discrete probabilistic IPC 2011 and 2014 and runner-up in 2018 (PROST) and the winner of the mixed discrete-continuous probabilistic IPC 2023 (DiSProd). In this paper, we provide JaxPlan that has several extensive upgrades to both planning by backpropagation and its compact tensorized compilation from RDDL to a Jax computation graph with discrete relaxations and a sample average approximation. We also provide the first detailed overview of a compilation of the RDDL language specification to Gurobi's Mixed Integer Nonlinear Programming (MINLP) solver that we term GurobiPlan. We provide a comprehensive comparative analysis of JaxPlan and GurobiPlan with competition winning planners on 19 domains and a total of 155 instances to assess their performance across (a) different domains, (b) different instance sizes, and (c) different time budgets. We also release all code to reproduce the results along with the open-source planners we describe in this work.", + "authors": "Michael Gimelfarb, Ayal Taitler, Scott Sanner", + "url": "https://openreview.net/forum?id=7IKtmUpLEH", + "pdf": "https://openreview.net//pdf/f68de0ba3b0e6f40ecdc40ca138aef72d5a918ce.pdf", + "primary_keywords": "", + "long": true, + "tldr": "We enhance existing probabilistic planning tools, namely planning-by-backpropagation and mixed-integer programming, to mixed discrete-continuous MDPs with nonlinear dynamics, and evaluate their performance against winners on IPC 2011/14/23.", + "type": "long" + } + ] + }, + "Applications": { + "papers": [ + { + "number": 232, + "title": "Multi-Objective Electric Vehicle Route and Charging Planning with Contraction Hierarchies", + "abstract": "Electric vehicle (EV) travel planning is a complex task that involves planning the routes and the charging sessions for EVs while optimizing travel duration and cost. We show the applicability of the multi-objective EV travel planning algorithm with practically usable solution times on country-sized road graphs with a large number of charging stations and a realistic EV model. The approach is based on multi-objective A* search enhanced by Contraction hierarchies, optimal dimensionality reduction, and sub-optimal $\\epsilon$-relaxation techniques. We performed an extensive empirical evaluation on 182\\,000 problem instances showing the impact of various algorithm settings on real-world map of Bavaria and Germany with more than 12\\,000 charging stations. The results show the proposed approach is the first one capable of performing such a genuine multi-objective optimization on realistically large country-scale problem instances that can achieve practically usable planning times in order of seconds with only a minor loss of solution quality. \nThe achieved speed-up varies from $\\sim11\\times$ for optimal solution to more than $250\\times$ for sub-optimal solution compared to vanilla multi-objective A*.", + "authors": "Marek Cuch\u00fd, Ji\u0159\u00ed Vok\u0159\u00ednek", + "url": "https://openreview.net/forum?id=xFKq0X1dHt", + "pdf": "https://openreview.net//pdf/a7b650ecf53701313ac1ae7100de8c42a2b8f924.pdf", + "primary_keywords": [ + "Applications" + ], + "long": true, + "tldr": "Practically usable multi-objective EV route and charging planning algorithm on realistic instances", + "type": "long" + }, + { + "number": 151, + "title": "Neural Combinatorial Optimization on Heterogeneous Graphs. An Application to the Picker Routing Problem in Mixed-shelves Warehouses", + "abstract": "In recent years, machine learning (ML) models capable of solving combinatorial optimization (CO) problems have received a surge of attention. While early approaches failed to outperform traditional CO solvers, the gap between handcrafted and learned heuristics has been steadily closing. However, most work in this area has focused on simple CO problems to benchmark new models and algorithms, leaving a gap in the development of methods specifically designed to handle more involved problems. Therefore, this work considers the problem of picker routing in the context of mixed-shelves warehouses, which involves not only a heterogeneous graph representation, but also a combinatorial action space resulting from the integrated selection and routing decisions to be made. We propose both a novel encoder to effectively learn representations of the heterogeneous graph and a hierarchical decoding scheme that exploits the combinatorial structure of the action space. The efficacy of the developed methods is demonstrated through a comprehensive comparison with established architectures as well as exact and heuristic solvers.", + "authors": "Laurin Luttmann, Lin Xie", + "url": "https://openreview.net/forum?id=BL0DDUfSzk", + "pdf": "https://openreview.net//pdf/57cbc4aeefae466f8cbae0d014800cc0d9ebc226.pdf", + "primary_keywords": [ + "Learning" + ], + "long": true, + "tldr": "We present a novel encoder-decoder architecture to solve complex combinatorial optimization problems and apply it to the mixed-shelves picker routing problem.", + "type": "long" + }, + { + "number": 140, + "title": "Replanning in Advance for Instant Delay Recovery in Multi-Agent Applications: Rerouting Trains in a Railway Hub", + "abstract": "Train routing is sensitive to delays that occur in the network. When a train is delayed, it is imperative that a new plan be found quickly, or else other trains may need to be stopped to ensure safety, potentially causing cascading delays. In this paper, we consider this class of multi-agent planning problems, which we call Multi-Agent Execution Delay Replanning. We show that these can be solved by reducing the problem to an any-start-time safe interval planning problem. When an agent has an any-start-time plan, it can react to a delay by simply looking up the precomputed plan for the delayed start time. We identify crucial real-world problem characteristics like the agent's speed, size, and safety envelope, and extend the any-start-time planning to account for them. Experimental results on real-world train networks show that any-start-time plans are compact and can be computed in reasonable time while enabling agents to instantly recover a safe plan.", + "authors": "Issa Hanou, Devin Wild Thomas, Wheeler Ruml, Mathijs de Weerdt", + "url": "https://openreview.net/forum?id=60cIgWnS57", + "pdf": "https://openreview.net//pdf/ee62623599b0c3d86d28f7f770109bbeda95d8b0.pdf", + "primary_keywords": [ + "Applications" + ], + "long": true, + "tldr": "Multi-agent delay replanning problems such as train rerouting can be solved by precomputing any-start-time plans to instantly recover a safe plan.", + "type": "long" + }, + { + "number": 336, + "title": "The Story So Far on Narrative Planning", + "abstract": "Narrative planning is the use of automated planning to construct, communicate, and understand stories, a form of information to which human cognition and enaction is pre-disposed. We review the narrative planning problem in a manner suitable as an introduction to the area, survey different plan-based methodologies and affordances for reasoning about narrative, and discuss open challenges relevant to the broader AI community.", + "authors": "Rogelio E. Cardona-Rivera, Arnav Jhala, Julie Porteous, R. Michael Young", + "url": "https://openreview.net/forum?id=xZkyYuoJI0", + "pdf": "https://openreview.net//pdf/1b5ff2d2f66fc3d2d42a9c6346c4d852d777abfa.pdf", + "primary_keywords": [ + "Applications" + ], + "long": true, + "tldr": "We review the application area of narrative planning, offer a survey of the topic that distills general themes while remaining grounded in technical language, and discuss open challenges relevant to the broader AI community.", + "type": "long" + } + ] + }, + "Knowledge Engineering": { + "papers": [ + { + "number": 148, + "title": "Decoupled Search for the Masses: A Novel Task Transformation for Classical Planning", + "abstract": "Automated problem reformulation is a common technique in classical planning to identify and exploit problem structures. Decoupled search is an approach that automatically decomposes planning tasks based on their causal structure, often significantly reducing the search effort. However, its broad applicability is limited by the need for specialized algorithms. In this paper, we present an approach that embodies decoupled search for non-optimal planning through a novel task transformation. Specifically, given a task and a decomposition, we create a transformed task such that the state space of the transformed task is isomorphic to that of decoupled search on the original task. This eliminates the need for specialized algorithms and allows the use of various planning technology in the decoupled-search framework. Empirical evaluation shows that our method is empirically competitive with specialized decoupled algorithms and favorable to other related problem reformulation techniques.", + "authors": "David Speck, Daniel Gnad", + "url": "https://openreview.net/forum?id=sqzyJpjsha", + "pdf": "https://openreview.net//pdf/21b7dc9f706f9275a482482b7492442e4f78da45.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 305, + "title": "On Policy Reuse: An Expressive Language for Representing and Executing General Policies that Call Other Policies", + "abstract": "Recently, a simple but powerful language for expressing and learning general policies and problem decompositions (sketches) has been introduced, which is based on collections of rules defined on a set of Boolean and numerical features. In this work, we consider three extensions of this basic language aimed at making policies and sketches more flexible and reusable: internal memory states, as in finite state con- trollers indexical features, whose values are a function of the state and a number of internal registers that can be loaded with objects; and modules that wrap up policies and sketches and allow them to call each other by passing parameters. In addition, unlike general policies that select actions indirectly by selecting state transitions, the new language allows for the selection of ground actions directly. The expressive power of the resulting language for recombining policies and sketches is illustrated through a number of examples. The problem of learning policies and sketches in the new language, from the bottom up, is left for future work.", + "authors": "Blai Bonet, Dominik Drexler, Hector Geffner", + "url": "https://openreview.net/forum?id=TSl0tWPiXT", + "pdf": "https://openreview.net//pdf/80bcc9475efc36abf6c5b79bf52df17b3d05fc89.pdf", + "primary_keywords": [ + "Theory", + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 376, + "title": "Planning Domain Simulation: An Interactive System for Plan Visualisation", + "abstract": "Representing and manipulating domain knowledge is essential for developing systems that can visualize plans. This paper presents a novel plan visualisation system called Planning Domain Simulation (PDSim) that employs knowledge representation and manipulation techniques to support the plan visualization process. PDSim can use PDDL or the Unified Planning Library Python representation as the underlying language for modelling planning problems and provides an interface for users to manipulate this representation through interaction with the Unity game engine and a set of planners. The system\u2019s features include visualising plan components, and their relationships, identifying plan conflicts, and examples applied to real-world problems. A user evaluation has been conducted to compare PDSim against the standard way using text editors and planners and to evaluate the perceived usefulness and ease of use of PDSim as an additional tool used by students for knowledge representation modelling and automated planning. The benefits and limitations of PDSim are also discussed, highlighting future research directions in the area.", + "authors": "Emanuele De Pellegrin, Ron Petrick", + "url": "https://openreview.net/forum?id=kNo4YkIotq", + "pdf": "https://openreview.net//pdf/8ac93fb907a9bb12b236e613db79f53660ae6642.pdf", + "primary_keywords": [ + "Applications", + "Knowledge Representation/Engineering" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 80, + "title": "Planning with Object Creation", + "abstract": "Classical planning problems are defined using some specification language, such as PDDL. The domain expert defines action schemas, objects, the initial state, and the goal. One key aspect of PDDL is that the set of objects cannot be modified during plan execution. While this is fine in many domains, sometimes it makes the modeling much more complicated. This not only impacts the performance of the planners, but it also requires the domain expert to bound the number of required objects beforehand, which might be an intractable problem by itself. Here, we introduce an extension to the classical planning formalism, where action effects can create and remove objects. This problem is semi-decidable, but it becomes decidable if we can bound the number of objects at any given state, even though the state-space is still infinite. On the practical side, we extend the Powerlifted planning system to support this PDDL extension. Our results show that Powerlifted does not lose efficiency by supporting this extension while allowing for easier PDDL models.", + "authors": "Augusto B. Corr\u00eaa, Giuseppe De Giacomo, Malte Helmert, Sasha Rubin", + "url": "https://openreview.net/forum?id=e7T3ksgYfP", + "pdf": "https://openreview.net//pdf/748524abf9777dee5f5948317164fd1cd851784a.pdf", + "primary_keywords": "", + "long": true, + "tldr": "We introduce an extension to classical planning where action effects can create and delete objects of the task.", + "type": "long" + } + ] + }, + "Search and Uncertainty": { + "papers": [ + { + "number": 172, + "title": "A Fast Algorithm for k-Memory Messaging Scheme Design in Dynamic Environments with Uncertainty", + "abstract": "We study the problem of designing the optimal k-memory messaging scheme in a dynamic environment. Specifically, a sender, who can perfectly observe the state of a dynamic environment but cannot take actions, aims to persuade an uninformed, far-sighted receiver to take actions to maximize the long-term utility of the sender, by sending messages. After receiving a message, the self-interested receiver derives a posterior belief and takes an action. The immediate reward of each player can be unaligned, thus the sender needs to ensure persuasiveness when designing the messaging scheme.\n\nWe formulate this problem as a bi-linear program and show that there exist infinitely many non-trivial persuasive messaging schemes for any problem instance. Moreover, we show that when the sender uses a k-memory messaging scheme, the optimal strategy for the receiver is also a k-memory strategy. We propose a fast heuristic algorithm for this problem and show that it can be extended to the setting where the sender has threat ability. We experimentally evaluate our algorithm, comparing it with the solution obtained by the Gurobi solver, in terms of performance and running time. Extensive experimental results show that our algorithm outperforms the solution in terms of running time, yet achieves comparable performance.", + "authors": "Zhikang Fan, Weiran Shen", + "url": "https://openreview.net/forum?id=S92C7ywZAQ", + "pdf": "https://openreview.net//pdf/736f974185ad3db00dca18e48d152a2eae5c0f3d.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 264, + "title": "Bounded Suboptimal Weight-Constrained Shortest-Path Search via Efficient Representation of Paths", + "abstract": "In the Weight-Constrained Shortest-Path (WCSP) problem, given a graph in which each edge is annotated with a cost and a weight, a start state, and a goal state, the task is to compute a minimum-cost path from the start state to the goal state with weight no larger than a specified weight limit. While most existing works have focused on solving the WCSP problem optimally, many real-world situations admit a trade-off between efficiency and a suboptimality bound for the path cost. In this paper, we propose a novel bounded suboptimal WCSP algorithm called WC-A\\*pex that is built on a state-of-the-art approximate bi-objective search algorithm called A\\*pex. WC-A\\*pex uses an efficient, albeit approximate, representation of paths with similar costs and weights to compute a (1 + \u03b5)-suboptimal path, for a user-specified \u03b5. During search, WC-A\\*pex avoids storing all paths explicitly and thereby reduces the search effort while still retaining its (1 + \u03b5)-suboptimality property. On benchmark instances that model road networks, our experimental results show that WC-A*pex with \u03b5 = 0.01 (i.e., with 1% suboptimality) achieves an order-of-magnitude speed-up over WC-A\\*, a state-of-the-art WCSP algorithm, and its bounded suboptimal variant.", + "authors": "Han Zhang, Oren Salzman, Ariel Felner, T. K. Satish Kumar, Sven Koenig", + "url": "https://openreview.net/forum?id=Ed1mt5Yjbh", + "pdf": "https://openreview.net//pdf/91b9a2cfd388ab376d7603d6cbad1ac0138e0dfe.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 218, + "title": "Computing Planning Centroids and Minimum Covering States using Symbolic Bidirectional Search", + "abstract": "In some scenarios, planning agents might be interested in reaching states that keep certain relationships with respect to a set of goals. Recently, two of these types of relationships were proposed: centroids, which minimize the average distance to the goals; and minimum covering states, which minimize the maximum distance to the goals. Existing approaches compute these states by searching forward either in the original or a reformulated task. In this paper, we propose several algorithms that use symbolic bidirectional search to efficiently compute centroids and minimum covering states. Experimental results in both existing and novel benchmarks show that our algorithms scale much better than previous approaches, establishing a new state of the art technique for this problem.", + "authors": "Alberto Pozanco, Alvaro Torralba, Daniel Borrajo", + "url": "https://openreview.net/forum?id=8SwyMcLcQh", + "pdf": "https://openreview.net//pdf/a5700a1f4eeadfe838d7af6bceb58b6ddcba8c1d.pdf", + "primary_keywords": "", + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 89, + "title": "Exact Multi-objective Path Finding with Negative Weights and Negative Cycles", + "abstract": "The point-to-point Multi-objective Shortest Path (MOSP) problem is a classic yet challenging task that involves finding all Pareto-optimal paths between two points in a graph with multiple edge costs. Recent studies have shown that employing A* search can lead to state-of-the-art performance in solving point-to-point MOSP instances with non-negative costs. In this paper, we propose a novel A*-based multi-objective search framework that not only handles graphs with negative costs and even negative cycles but also incorporates multiple speed-up techniques to enhance the efficiency of exhaustive search with A*. Through extensive experiments on large realistic test cases, our algorithm demonstrates remarkable success in solving difficult MOSP instances, outperforming the state of the art by up to an order of magnitude.", + "authors": "Saman Ahmadi, Daniel Harabor, Nathan R. Sturtevant, Mahdi Jalili", + "url": "https://openreview.net/forum?id=sBHklMlq0c", + "pdf": "https://openreview.net//pdf/85f1d122187518945b84f1fa1f0e52f6afdf7e82.pdf", + "primary_keywords": [ + "Applications", + "Theory" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 174, + "title": "Lookahead Pathology in Monte-Carlo Tree Search", + "abstract": "Monte-Carlo Tree Search (MCTS) is an adversarial search paradigm that first found prominence with its success in the domain of computer Go. Early theoretical work established the game-theoretic soundness and convergence bounds for Upper Confidence bounds applied to Trees (UCT), the most popular instantiation of MCTS; however, there remain notable gaps in our understanding of how UCT behaves in practice. In this work, we address one such gap by considering the question of whether UCT can exhibit lookahead pathology --- a paradoxical phenomenon first observed in Minimax search where greater search effort leads to worse decision-making. We introduce a novel family of synthetic games that offer rich modeling possibilities while remaining amenable to mathematical analysis. Our theoretical and experimental results suggest that UCT is indeed susceptible to pathological behavior in a range of games drawn from this family.", + "authors": "Khoi P. N. Nguyen, Raghuram Ramanujan", + "url": "https://openreview.net/forum?id=sNEl4S31xe", + "pdf": "https://openreview.net//pdf/42a4c37ef7f50cfe0ec3f596f6ca02a604ddf282.pdf", + "primary_keywords": [ + "Multi-Agent Planning" + ], + "long": true, + "tldr": "", + "type": "long" + }, + { + "number": 220, + "title": "New Fuzzing Biases for Action Policy Testing", + "abstract": "Testing was recently proposed as a method to gain trust in learned\naction policies in classical planning. Test cases in this setting are\nstates generated by a fuzzing process that performs random walks from\nthe initial state. A fuzzing bias attempts to bias these random walks\ntowards policy bugs, that is, states where the policy performs\nsub-optimally. Prior work explored a simple fuzzing bias based on\npolicy-trace cost. Here, we investigate this topic more deeply. We\nintroduce three new fuzzing biases based on analyses of policy-trace\nshape, estimating whether a trace is close to looping back on\nitself, whether it contains detours, and whether its goal-distance\nsurface does not smoothly decline. Our experiments with two kinds of\nneural action policies show that these new biases improve bug-finding\ncapabilities in many cases.", + "authors": "Jan Eisenhut, Xandra Schuler, Daniel Fi\u0161er, Daniel H\u00f6ller, Maria Christakis, J\u00f6rg Hoffmann", + "url": "https://openreview.net/forum?id=FtvLItoAbL", + "pdf": "https://openreview.net//pdf/192ee85105686b2ecb2e99d8b831a6b36d955013.pdf", + "primary_keywords": "", + "long": false, + "tldr": "", + "type": "short" + } + ] + } +} \ No newline at end of file diff --git a/data/schedule/sessions.json b/data/schedule/sessions.json new file mode 100644 index 0000000..43905b1 --- /dev/null +++ b/data/schedule/sessions.json @@ -0,0 +1,188 @@ +[ + { + "date": "2024-06-04", + "name": "Tuesday, June 4, 2024", + "slots": [ + { + "label": "slot-1-1", + "name": "Session 1", + "duration": 90, + "start": "10:00", + "end": "11:30", + "sessions": [ + { + "name": "RL", + "label": "sess-rl", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Heuristics", + "label": "sess-heur", + "location": "TBD", + "chair": "TBD" + } + ] + }, + { + "label": "slot-1-2", + "name": "Session 2", + "duration": 90, + "start": "13:00", + "end": "14:30", + "sessions": [ + { + "name": "LLMs and Neural Action Policies", + "label": "sess-llm", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Theory", + "label": "sess-theory", + "location": "TBD", + "chair": "TBD" + } + ] + }, + { + "label": "slot-1-3", + "name": "Session 3", + "duration": 90, + "start": "15:00", + "end": "16:30", + "sessions": [ + { + "name": "Planning and Learning", + "label": "sess-pal", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Temporal Planning", + "label": "sess-temporal", + "location": "TBD", + "chair": "TBD" + } + ] + } + ] + }, + { + "date": "2024-06-05", + "name": "Wednesday, June 5, 2024", + "slots": [ + { + "label": "slot-2-1", + "name": "Session 1", + "duration": 90, + "start": "10:00", + "end": "11:30", + "sessions": [ + { + "name": "Search and Uncertainty", + "label": "sess-search", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "MAPF", + "label": "sess-mapf", + "location": "TBD", + "chair": "TBD" + } + ] + }, + { + "label": "slot-2-2", + "name": "Session 2", + "duration": 90, + "start": "13:00", + "end": "14:30", + "sessions": [ + { + "name": "Planning Under Uncertainty", + "label": "sess-puu", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Robotics", + "label": "sess-robotics", + "location": "TBD", + "chair": "TBD" + } + ] + }, + { + "label": "slot-2-3", + "name": "Session 3", + "duration": 60, + "start": "15:00", + "end": "16:00", + "sessions": [ + { + "name": "Robots and Space", + "label": "sess-space", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Explainability", + "label": "sess-explain", + "location": "TBD", + "chair": "TBD" + } + ] + } + ] + }, + { + "date": "2024-06-06", + "name": "Thursday, June 6, 2024", + "slots": [ + { + "label": "slot-3-1", + "name": "Session 1", + "duration": 60, + "start": "10:00", + "end": "11:00", + "sessions": [ + { + "name": "Knowledge Engineering", + "label": "sess-ke", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Scheduling", + "label": "sess-scheduling", + "location": "TBD", + "chair": "TBD" + } + ] + }, + { + "label": "slot-3-2", + "name": "Session 2", + "duration": 60, + "start": "11:00", + "end": "12:00", + "sessions": [ + { + "name": "Applications", + "label": "sess-applications", + "location": "TBD", + "chair": "TBD" + }, + { + "name": "Hybrid and Numeric Planning", + "label": "sess-hybrid", + "location": "TBD", + "chair": "TBD" + } + ] + } + ] + } +] \ No newline at end of file diff --git a/layouts/shortcodes/detailed_schedule_json.html b/layouts/shortcodes/detailed_schedule_json.html new file mode 100644 index 0000000..5b39937 --- /dev/null +++ b/layouts/shortcodes/detailed_schedule_json.html @@ -0,0 +1,318 @@ +{{ $file := .Get 0 }} +{{ $sessions := getJSON $file }} +{{ $file := .Get 1 }} +{{ $papers := getJSON $file }} + + + + + + + +
+ +{{ print $paper.authors }}
++ {{ if eq $paper.type "long" }} + Long Paper (15min) + {{ else if eq $paper.type "short" }} + Short Paper (10min) + {{ else }} + Previously Published Paper (10min) + {{ end }} +