diff --git a/_bibliography/ASL_Bib.bib b/_bibliography/ASL_Bib.bib index 380fd083..9a23700d 100644 --- a/_bibliography/ASL_Bib.bib +++ b/_bibliography/ASL_Bib.bib @@ -5475,30 +5475,31 @@ @phdthesis{Allen2016 } @inproceedings{AgiaVilaEtAl2024, - author = {Agia, C. and Vila, {G. C.} and Bandyopadhyay, S. and Bayard, {D. S.} and Cheung, K. and Lee, {C. H.} and Wood, E. and Aenishanslin, I. and Ardito, S. and Fesq, L. and Pavone, M. and Nesnas, {I. A. D.}}, - title = {Modeling Considerations for Developing Deep Space Autonomous Spacecraft and Simulators}, - booktitle = proc_IEEE_AC, - year = {2024}, - asl_abstract = {To extend the limited scope of autonomy used in prior missions for operation in distant and complex environments, there is a need to further develop and mature autonomy that jointly reasons over multiple subsystems, which we term system-level autonomy. System-level autonomy establishes situational awareness that resolves conflicting information across subsystems, which may necessitate the refinement and interconnection of the underlying spacecraft and environment onboard models. However, with a limited understanding of the assumptions and tradeoffs of modeling to arbitrary extents, designing onboard models to support system-level capabilities presents a significant challenge. In this paper, we provide a detailed analysis of the increasing levels of model fidelity for several key spacecraft subsystems, with the goal of informing future spacecraft functional- and system-level autonomy algorithms and the physics-based simulators on which they are validated. We do not argue for the adoption of a particular fidelity class of models but, instead, highlight the potential tradeoffs and opportunities associated with the use of models for onboard autonomy and in physics-based simulators at various fidelity levels. We ground our analysis in the context of deep space exploration of small bodies, an emerging frontier for autonomous spacecraft operation in space, where the choice of models employed onboard the spacecraft may determine mission success. We conduct our experiments in the Multi-Spacecraft Concept and Autonomy Tool (MuSCAT), a software suite for developing spacecraft autonomy algorithms.}, - asl_address = {Big Sky, Montana}, - asl_month = mar, - asl_url = {https://arxiv.org/abs/2401.11371}, - owner = {agia}, - timestamp = {2024-03-01} + author = {Agia, C. and Vila, {G. C.} and Bandyopadhyay, S. and Bayard, {D. S.} and Cheung, K. and Lee, {C. H.} and Wood, E. and Aenishanslin, I. and Ardito, S. and Fesq, L. and Pavone, M. and Nesnas, {I. A. D.}}, + title = {Modeling Considerations for Developing Deep Space Autonomous Spacecraft and Simulators}, + booktitle = proc_IEEE_AC, + year = {2024}, + abstract = {To extend the limited scope of autonomy used in prior missions for operation in distant and complex environments, there is a need to further develop and mature autonomy that jointly reasons over multiple subsystems, which we term system-level autonomy. System-level autonomy establishes situational awareness that resolves conflicting information across subsystems, which may necessitate the refinement and interconnection of the underlying spacecraft and environment onboard models. However, with a limited understanding of the assumptions and tradeoffs of modeling to arbitrary extents, designing onboard models to support system-level capabilities presents a significant challenge. In this paper, we provide a detailed analysis of the increasing levels of model fidelity for several key spacecraft subsystems, with the goal of informing future spacecraft functional- and system-level autonomy algorithms and the physics-based simulators on which they are validated. We do not argue for the adoption of a particular fidelity class of models but, instead, highlight the potential tradeoffs and opportunities associated with the use of models for onboard autonomy and in physics-based simulators at various fidelity levels. We ground our analysis in the context of deep space exploration of small bodies, an emerging frontier for autonomous spacecraft operation in space, where the choice of models employed onboard the spacecraft may determine mission success. We conduct our experiments in the Multi-Spacecraft Concept and Autonomy Tool (MuSCAT), a software suite for developing spacecraft autonomy algorithms.}, + address = {Big Sky, Montana}, + month = mar, + url = {https://arxiv.org/abs/2401.11371}, + owner = {agia}, + timestamp = {2024-10-30} } -@Article{AgiaSinhaEtAl2024, +@inproceedings{AgiaSinhaEtAl2024, author = {Agia, C. and Sinha, R. and Yang, J. and Cao, Z. and Antonova, R. and Pavone, M. and Jeannette Bohg}, title = {Unpacking Failure Modes of Generative Policies: Runtime Monitoring of Consistency and Progress}, + booktitle = proc_CoRL, year = {2024}, month = nov, abstract = {Robot behavior policies trained via imitation learning are prone to failure under conditions that deviate from their training data. Thus, algorithms that monitor learned policies at test time and provide early warnings of failure are necessary to facilitate scalable deployment. We propose Sentinel, a runtime monitoring framework that splits the detection of failures into two complementary categories: 1) Erratic failures, which we detect using statistical measures of temporal action consistency, and 2) task progression failures, where we use Vision Language Models (VLMs) to detect when the policy confidently and consistently takes actions that do not solve the task. Our approach has two key strengths. First, because learned policies exhibit diverse failure modes, combining complementary detectors leads to significantly higher accuracy at failure detection. Second, using a statistical temporal action consistency measure ensures that we quickly detect when multimodal, generative policies exhibit erratic behavior at negligible computational cost. In contrast, we only use VLMs to detect failure modes that are less time-sensitive. We demonstrate our approach in the context of diffusion policies trained on robotic mobile manipulation domains in both simulation and the real world. By unifying temporal consistency detection and VLM runtime monitoring, Sentinel detects 18\% more failures than using either of the two detectors alone and significantly outperforms baselines, thus highlighting the importance of assigning specialized detectors to complementary categories of failure. Qualitative results are made available at sites.google.com/stanford.edu/sentinel.}, address = {Munich, Germany}, - booktitle = proc_CoRL, keywords = {press}, - owner = {jthluke}, - timestamp = {2024-10-28}, - url = {https://arxiv.org/abs/2410.04640}, + note = {In press}, + owner = {agia}, + timestamp = {2024-10-30}, + url = {https://arxiv.org/abs/2410.04640} } @inproceedings{AbtahiLandryEtAl2019,