paper.bib

@article{POMDPs.jl,
author = {Egorov, Maxim and Sunberg, Zachary N. and Balaban, Edward and Wheeler, Tim A. and Gupta, Jayesh K. and Kochenderfer, Mykel J.},
title = {POMDPs.jl: a framework for sequential decision making under uncertainty},
year = {2017},
issue_date = {January 2017},
publisher = {JMLR.org},
volume = {18},
number = {1},
issn = {1532-4435},
abstract = {POMDPs.jl is an open-source framework for solving Markov decision processes (MDPs) and partially observable MDPs (POMDPs). POMDPs.jl allows users to specify sequential decision making problems with minimal effort without sacrificing the expressive nature of POMDPs, making this framework viable for both educational and research purposes. It is written in the Julia language to allow flexible prototyping and large-scale computation that leverages the high-performance nature of the language. The associated JuliaPOMDP community also provides a number of state-of-the-art MDP and POMDP solvers and a rich library of support tools to help with implementing new solvers and evaluating the solution results. The most recent version of POMDPs.jl, the related packages, and documentation can be found at https://github.com/JuliaPOMDP/POMDPs.jl.},
journal = {Journal of Machine Learning Research},
month = {jan},
pages = {831–835},
numpages = {5},
keywords = {sequential decision making, open-source, POMDP, MDP, Julia}
}

@article{Roy,
   title={Finding Approximate POMDP solutions Through Belief Compression},
   volume={23},
   ISSN={1076-9757},
   url={http://dx.doi.org/10.1613/jair.1496},
   DOI={10.1613/jair.1496},
   journal={Journal of Artificial Intelligence Research},
   publisher={AI Access Foundation},
   author={Roy, N. and Gordon, G. and Thrun, S.},
   year={2005},
   month=jan, pages={1–40} 
}

@article{Kaelbling,
  title = {Planning and acting in partially observable stochastic domains},
  journal = {Artificial Intelligence},
  volume = {101},
  number = {1},
  pages = {99-134},
  year = {1998},
  issn = {0004-3702},
  doi = {10.1016/S0004-3702(98)00023-X},
  url = {https://doi.org/10.1016/S0004-3702(98)00023-X},
  author = {Leslie Pack Kaelbling and Michael L. Littman and Anthony R. Cassandra},
  keywords = {Planning, Uncertainty, Partially observable Markov decision processes},
  abstract = {In this paper, we bring techniques from operations research to bear on the problem of choosing optimal actions in partially observable stochastic domains. We begin by introducing the theory of Markov decision processes (mdps) and partially observable MDPs (pomdps). We then outline a novel algorithm for solving pomdps off line and show how, in some cases, a finite-memory controller can be extracted from the solution to a POMDP. We conclude with a discussion of how our approach relates to previous work, the complexity of finding exact solutions to pomdps, and of some possibilities for finding approximate solutions.}
}

@article{carbon,
       author = {{Wang}, Yizheng and {Zechner}, Markus and {Wen}, Gege and {Corso}, Anthony Louis and {Mern}, John Michael and {Kochenderfer}, Mykel J. and {Karel Caers}, Jef},
        title = "{Optimizing Carbon Storage Operations for Long-Term Safety}",
      journal = {arXiv e-prints},
     keywords = {Computer Science - Artificial Intelligence, Electrical Engineering and Systems Science - Systems and Control, Physics - Fluid Dynamics},
         year = 2023,
        month = apr,
          eid = {arXiv:2304.09352},
        pages = {arXiv:2304.09352},
          doi = {10.48550/arXiv.2304.09352},
archivePrefix = {arXiv},
       eprint = {2304.09352},
 primaryClass = {cs.AI},
       adsurl = {https://doi.org/10.48550/arXiv.2304.09352},
      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@article{drugs,
   title={Optimization of Molecules via Deep Reinforcement Learning},
   volume={9},
   ISSN={2045-2322},
   url={http://dx.doi.org/10.1038/s41598-019-47148-x},
   DOI={10.1038/s41598-019-47148-x},
   number={1},
   journal={Scientific Reports},
   publisher={Springer Science and Business Media LLC},
   author={Zhou, Zhenpeng and Kearnes, Steven and Li, Li and Zare, Richard N. and Riley, Patrick},
   year={2019},
   month=jul }

@article{planes,
author = {Larkin Folsom and Masahiro Ono and Kyohei Otsu and Hyoshin Park},
title ={Scalable information-theoretic path planning for a rover-helicopter team in uncertain environments},
journal = {International Journal of Advanced Robotic Systems},
volume = {18},
number = {2},
pages = {1729881421999587},
year = {2021},
doi = {10.1177/1729881421999587},
URL = {https://doi.org/10.1177/1729881421999587},
eprint = { 10.1177/1729881421999587 },
abstract = { Mission-critical exploration of uncertain environments requires reliable and robust mechanisms for achieving information gain. Typical measures of information gain such as Shannon entropy and KL divergence are unable to distinguish between different bimodal probability distributions or introduce bias toward one mode of a bimodal probability distribution. The use of a standard deviation (SD) metric reduces bias while retaining the ability to distinguish between higher and lower risk distributions. Areas of high SD can be safely explored through observation with an autonomous Mars Helicopter allowing safer and faster path plans for ground-based rovers. First, this study presents a single-agent information-theoretic utility-based path planning method for a highly correlated uncertain environment. Then, an information-theoretic two-stage multiagent rapidly exploring random tree framework is presented, which guides Mars helicopter through regions of high SD to reduce uncertainty for the rover. In a Monte Carlo simulation, we compare our information-theoretic framework with a rover-only approach and a naive approach, in which the helicopter scouts ahead of the rover along its planned path. Finally, the model is demonstrated in a case study on the Jezero region of Mars. Results show that the information-theoretic helicopter improves the travel time for the rover on average when compared with the rover alone or with the helicopter scouting ahead along the rover’s initially planned route. }}

@INPROCEEDINGS{markets,
  author={Jamgochian, Arec L. and Kochenderfer, Mykel J.},
  booktitle={2019 IEEE International Conference on Connected Vehicles and Expo (ICCVE)}, 
  title={Stochastic Model Predictive Control for Scheduling Charging of Electric Vehicle Fleets with Market Power}, 
  year={2019},
  volume={},
  number={},
  pages={1-6},
  keywords={stochastic programming;model predictive control;electric vehicles;charge scheduling},
  doi={10.1109/ICCVE45908.2019.8965237}}

@article{factor,
title={Multiple factor analysis.},
author={Thurstone, Louis Leon},
journal={Psychological review},
volume={38},
number={5},
pages={406},
year={1931},
publisher={Psychological Review Company},
doi={10.1037/h0069792}
}


@article{autoencoder,
  title={Nonlinear principal component analysis using autoassociative neural networks},
  author={Kramer, Mark A},
  journal={AIChE journal},
  volume={37},
  number={2},
  pages={233--243},
  year={1991},
  publisher={Wiley Online Library},
  doi={10.1002/aic.690370209}
}

@article{VAE,
  title={Auto-encoding variational {B}ayes},
  author={Kingma, Diederik P and Welling, Max},
  journal={arXiv preprint arXiv:1312.6114},
  year={2013},
  doi={10.48550/arXiv.1312.6114}
}

@inproceedings{PBVI,
author = {Pineau, Joelle and Gordon, Geoff and Thrun, Sebastian},
title = {Point-based value iteration: an anytime algorithm for POMDPs},
year = {2003},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
abstract = {This paper introduces the Point-Based Value Iteration (PBVI) algorithm for POMDP planning. PBVI approximates an exact value iteration solution by selecting a small set of representative belief points and then tracking the value and its derivative for those points only. By using stochastic trajectories to choose belief points, and by maintaining only one value hyper-plane per point, PBVI successfully solves large problems: we present results on a robotic laser tag problem as well as three test domains from the literature.},
booktitle = {International {J}oint {C}onference on {A}rtificial {I}ntelligence},
pages = {1025–1030},
numpages = {6},
location = {Acapulco, Mexico},
series = {IJCAI'03}
}

@article{perseus,
  title={Perseus: Randomized point-based value iteration for POMDPs},
  author={Spaan, Matthijs TJ and Vlassis, Nikos},
  journal={Journal of artificial intelligence research},
  volume={24},
  pages={195--220},
  year={2005},
  doi={10.1613/jair.1659}
}

@article{hsvi,
  title={Point-based POMDP algorithms: Improved analysis and implementation},
  author={Smith, Trey and Simmons, Reid},
  journal={arXiv preprint arXiv:1207.1412},
  year={2012},
  doi={10.48550/arXiv.1207.1412}
}

@article{isomap,
  title={A global geometric framework for nonlinear dimensionality reduction},
  author={Tenenbaum, Joshua B and Silva, Vin de and Langford, John C},
  journal={science},
  volume={290},
  number={5500},
  pages={2319--2323},
  year={2000},
  publisher={American Association for the Advancement of Science},
  doi={10.1126/science.290.5500.2319}
}

@article{kd-trees,
author = {Bentley, Jon Louis},
title = {Multidimensional binary search trees used for associative searching},
year = {1975},
issue_date = {Sept. 1975},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {18},
number = {9},
issn = {0001-0782},
url = {10.1145/361002.361007},
doi = {10.1145/361002.361007},
abstract = {This paper develops the multidimensional binary search tree (or k-d tree, where k is the dimensionality of the search space) as a data structure for storage of information to be retrieved by associative searches. The k-d tree is defined and examples are given. It is shown to be quite efficient in its storage requirements. A significant advantage of this structure is that a single data structure can handle many types of queries very efficiently. Various utility algorithms are developed; their proven average running times in an n record file are: insertion, O(log n); deletion of the root, O(n(k-1)/k); deletion of a random node, O(log n); and optimization (guarantees logarithmic performance of searches), O(n log n). Search algorithms are given for partial match queries with t keys specified [proven maximum running time of O(n(k-t)/k)] and for nearest neighbor queries [empirically observed average running time of O(log n).] These performances far surpass the best currently known algorithms for these tasks. An algorithm is presented to handle any general intersection query. The main focus of this paper is theoretical. It is felt, however, that k-d trees could be quite useful in many applications, and examples of potential uses are given.},
journal = {Communications of the ACM},
month = {sep},
pages = {509–517},
numpages = {9},
keywords = {associative retrieval, attribute, binary search trees, binary tree insertion, information retrieval system, intersection queries, key, nearest neighbor queries, partial match queries}
}

@book{AFDM,
  title={Algorithms for {D}ecision {M}aking},
  author={Kochenderfer, M.J. and Wheeler, T.A. and Wray, K.H.},
  isbn={9780262370233},
  url={https://books.google.com/books?id=zKtaEAAAQBAJ},
  year={2022},
  publisher={MIT Press}
}

@misc{Julia,
      title={Julia: A Fast Dynamic Language for Technical Computing}, 
      author={Jeff Bezanson and Stefan Karpinski and Viral B. Shah and Alan Edelman},
      year={2012},
      eprint={1209.5145},
      archivePrefix={arXiv},
      primaryClass={cs.PL},
      doi={10.48550/arXiv.1209.5145},
      url={https://doi.org/10.48550/arXiv.1209.5145}
}

@INPROCEEDINGS{SARSOP,
    AUTHOR    = {Kurniawati, Hanna and Hsu, David Hsu and Lee, Wee Sun},
    TITLE     = {{SARSOP}: Efficient Point-Based {POMDP} Planning by Approximating Optimally Reachable Belief Spaces},
    BOOKTITLE = {Robotics: {S}cience and {S}ystems},
    YEAR      = {2008},
    ADDRESS   = {Zurich, Switzerland},
    MONTH     = {June},
    DOI       = {10.15607/RSS.2008.IV.009} 
}

@article{despot,
  title={DESPOT: Online POMDP planning with regularization},
  author={Somani, Adhiraj and Ye, Nan and Hsu, David and Lee, Wee Sun},
  journal={Advances in Neural Information Processing Systems},
  volume={26},
  year={2013},
  doi={10.1613/jair.5328},
}

@inproceedings{sunberg2018online,
  title={Online algorithms for POMDPs with continuous state, action, and observation spaces},
  author={Sunberg, Zachary and Kochenderfer, Mykel},
  booktitle={International Conference on Automated Planning and Scheduling},
  volume={28},
  pages={259--263},
  year={2018},
  doi={10.1609/icaps.v28i1.13882}
}

@article{pomcp,
  title={Monte-{C}arlo planning in large POMDPs},
  author={Silver, David and Veness, Joel},
  journal={Advances in neural information processing systems},
  volume={23},
  year={2010}
}

@inproceedings{mcts,
  title={Bandit based {M}onte-{C}arlo planning},
  author={Kocsis, Levente and Szepesv{\'a}ri, Csaba},
  booktitle={European conference on machine learning},
  pages={282--293},
  year={2006},
  organization={Springer},
  doi={10.1007/11871842_29}
}

@inproceedings{AEMS,
  title={AEMS: An anytime online search algorithm for approximate policy refinement in large POMDPs.},
  author={Ross, St{\'e}phane and Chaib-Draa, Brahim and others},
  booktitle={IJCAI},
  pages={2592--2598},
  year={2007}
}

@inproceedings{EPCA,
 author = {Collins, Michael and Dasgupta, S. and Schapire, Robert E},
 booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems},
 pages = {},
 title = {A Generalization of Principal Components Analysis to the Exponential Family},
 url = {https://proceedings.neurips.cc/paper_files/paper/2001/file/f410588e48dc83f2822a880a68f78923-Paper.pdf},
 year = {2001},
 doi={10.7551/mitpress/1120.003.0084}
}

@misc{epca-MATLAB,
title={E-PCA},
author={Guillaume de Chambrier},
url={https://github.com/gpldecha/e-pca},
year={2016}
}

@article{complexity1,
author = {Papadimitriou, Christos H. and Tsitsiklis, John N.},
title = {The Complexity of {M}arkov Decision Processes},
year = {1987},
issue_date = {August 1987},
publisher = {INFORMS},
address = {Linthicum, MD, USA},
volume = {12},
number = {3},
issn = {0364-765X},
abstract = {We investigate the complexity of the classical problem of optimal policy computation in Markov decision processes. All three variants of the problem finite horizon, infinite horizon discounted, and infinite horizon average cost were known to be solvable in polynomial time by dynamic programming finite horizon problems, linear programming, or successive approximation techniques infinite horizon. We show that they are complete for P, and therefore most likely cannot be solved by highly parallel algorithms. We also show that, in contrast, the deterministic cases of all three problems can be solved very fast in parallel. The version with partially observed states is shown to be PSPACE-complete, and thus even less likely to be solved in polynomial time than the NP-complete problems; in fact, we show that, most likely, it is not possible to have an efficient on-line implementation involving polynomial time on-line computations and memory of an optimal policy, even if an arbitrary amount of precomputation is allowed. Finally, the variant of the problem in which there are no observations is shown to be NP-complete.},
journal = {Mathematics of Operations Research},
month = {aug},
pages = {441–450},
numpages = {10},
doi = {10.1287/moor.12.3.441},
url = {https://doi.org/10.1287/moor.12.3.441},
keywords = {parallel computation, dynamic programming, computational complexity, Markov decision processes}
}

@article{complexity2,
title = {On the undecidability of probabilistic planning and related stochastic optimization problems},
journal = {Artificial Intelligence},
volume = {147},
number = {1},
pages = {5-34},
year = {2003},
note = {Planning with Uncertainty and Incomplete Information},
issn = {0004-3702},
doi = {10.1016/S0004-3702(02)00378-8},
url = {https://doi.org/10.1016/S0004-3702(02)00378-8},
author = {Omid Madani and Steve Hanks and Anne Condon},
keywords = {Probabilistic planning, Undecidability, Computability, Markov decision processes, Computational complexity, Infinity-horizon, Partial observability, Unobservability, Stochastic optimization, Discounted},
abstract = {Automated planning, the problem of how an agent achieves a goal given a repertoire of actions, is one of the foundational and most widely studied problems in the AI literature. The original formulation of the problem makes strong assumptions regarding the agent's knowledge and control over the world, namely that its information is complete and correct, and that the results of its actions are deterministic and known. Recent research in planning under uncertainty has endeavored to relax these assumptions, providing formal and computation models wherein the agent has incomplete or noisy information about the world and has noisy sensors and effectors. This research has mainly taken one of two approaches: extend the classical planning paradigm to a semantics that admits uncertainty, or adopt another framework for approaching the problem, most commonly the Markov Decision Process (MDP) model. This paper presents a complexity analysis of planning under uncertainty. It begins with the “probabilistic classical planning” problem, showing that problem to be formally undecidable. This fundamental result is then applied to a broad class of stochastic optimization problems, in brief any problem statement where the agent (a) operates over an infinite or indefinite time horizon, and (b) has available only probabilistic information about the system's state. Undecidability is established for policy-existence problems for partially observable infinite-horizon Markov decision processes under discounted and undiscounted total reward models, average-reward models, and state-avoidance models. The results also apply to corresponding approximation problems with undiscounted objective functions. The paper answers a significant open question raised by Papadimitriou and Tsitsiklis [Math. Oper. Res. 12 (3) (1987) 441–450] about the complexity of infinite horizon POMDPs.}
}

@inproceedings{grid,
 author = {Davies, Scott},
 booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems},
 editor = {M.C. Mozer and M. Jordan and T. Petsche},
 pages = {},
 publisher = {MIT Press},
 title = {Multidimensional Triangulation and Interpolation for Reinforcement Learning},
 url = {https://proceedings.neurips.cc/paper_files/paper/1996/file/310ce61c90f3a46e340ee8257bc70e93-Paper.pdf},
 volume = {9},
 year = {1996}
}

@article{kNN,
 ISSN = {03067734, 17515823},
 URL = {http://www.jstor.org/stable/1403797},
 author = {Evelyn Fix and J. L. Hodges},
 journal = {International Statistical Review},
 number = {3},
 pages = {238--247},
 publisher = {[Wiley, International Statistical Institute (ISI)]},
 title = {Discriminatory Analysis. Nonparametric Discrimination: Consistency Properties},
 urldate = {2024-03-19},
 volume = {57},
 year = {1989},
 doi={10.1037/e471672008-001}
}

@article{PCA,
  title={Analysis of a complex of statistical variables into principal components},
  author={Harold Hotelling},
  journal={Journal of Educational Psychology},
  year={1933},
  volume={24},
  pages={498-520},
  url={https://api.semanticscholar.org/CorpusID:144828484},
  doi={10.1037/h0070888}
}

@ARTICLE{kernelPCA,
  author={Schölkopf, Bernhard and Smola, Alexander and Müller, Klaus-Robert},
  journal={Neural Computation}, 
  title={Nonlinear Component Analysis as a Kernel Eigenvalue Problem}, 
  year={1998},
  volume={10},
  number={5},
  pages={1299-1319},
  keywords={},
  doi={10.1162/089976698300017467}
}

@article{PPCA,
    author = {Tipping, Michael E. and Bishop, Christopher M.},
    title = "{Probabilistic Principal Component Analysis}",
    journal = {Journal of the Royal Statistical Society},
    volume = {61},
    number = {3},
    pages = {611-622},
    year = {2002},
    month = {01},
    abstract = "{Principal component analysis (PCA) is a ubiquitous technique for data analysis and processing, but one which is not based on a probability model. We demonstrate how the principal axes of a set of observed data vectors may be determined through maximum likelihood estimation of parameters in a latent variable model that is closely related to factor analysis. We consider the properties of the associated likelihood function, giving an EM algorithm for estimating the principal subspace iteratively, and discuss, with illustrative examples, the advantages conveyed by this probabilistic approach to PCA.}",
    issn = {1369-7412},
    doi = {10.1111/1467-9868.00196},
    url = {https://doi.org/10.1111/1467-9868.00196},
    eprint = {https://academic.oup.com/jrsssb/article-pdf/61/3/611/49589634/jrsssb\_61\_3\_611.pdf},
}

@incollection{error_bound,
title = {Stable Function Approximation in Dynamic Programming},
editor = {Armand Prieditis and Stuart Russell},
booktitle = {Machine {L}earning},
publisher = {Morgan Kaufmann},
address = {San Francisco (CA)},
pages = {261-268},
year = {1995},
isbn = {978-1-55860-377-6},
doi = {10.1016/B978-1-55860-377-6.50040-2},
url = {https://doi.org/10.1016/B978-1-55860-377-6.50040-2},
author = {Geoffrey J. Gordon},
abstract = {The success of reinforcement learning in practical problems depends on the ability to combine function approximation with temporal difference methods such as value iteration. Experiments in this area have produced mixed results; there have been both notable successes and notable disappointments. Theory has been scarce, mostly due to the difficulty of reasoning about function approximators that generalize beyond the observed data. We provide a proof of convergence for a wide class of temporal difference methods involving function approximators such as k-nearest-neighbor, and show experimentally that these methods can be useful. The proof is based on a view of function approximators as expansion or contraction mappings. In addition, we present a novel view of fitted value iteration: an approximate algorithm for one environment turns out to be an exact algorithm for a different environment.}
}

@article{flux,
  author    = {Mike Innes},
  title     = {Flux: Elegant Machine Learning with {J}ulia},
  journal   = {Journal of Open Source Software},
  year      = {2018},
  doi       = {10.21105/joss.00602},
}

@article{belief-state-MDP,
title = {Optimal control of {M}arkov processes with incomplete state information},
journal = {Journal of Mathematical Analysis and Applications},
volume = {10},
number = {1},
pages = {174-205},
year = {1965},
issn = {0022-247X},
doi = {10.1016/0022-247X(65)90154-X},
url = {https://doi.org/10.1016/0022-247X(65)90154-X},
author = {K.J Åström}
}