From c7e6810de30386eb4b07b9fb0daf4540e58b6aa6 Mon Sep 17 00:00:00 2001 From: Edward Balaban Date: Sun, 7 Feb 2016 00:19:11 -0800 Subject: [PATCH 01/30] Templated types and function definitions --- src/POMDPs.jl | 5 +++-- src/belief.jl | 14 +++++++------- src/distribution.jl | 18 +++++++++--------- src/policy.jl | 18 +++++++++--------- src/pomdp.jl | 41 +++++++++++++++++++++++------------------ src/simulator.jl | 4 ++-- src/solver.jl | 6 +++--- src/space.jl | 26 +++++++++++++------------- 8 files changed, 69 insertions(+), 63 deletions(-) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index ae118f45..9cf65e27 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -1,6 +1,6 @@ module POMDPs -import Base.rand! +import Base.rand export # Abstract type @@ -21,6 +21,7 @@ export observation, reward, isterminal, + isterminal_obs, # Need below?; create_state, @@ -93,7 +94,7 @@ include("space.jl") include("solver.jl") include("policy.jl") include("simulator.jl") -include("docs.jl") +#include("docs.jl") end diff --git a/src/belief.jl b/src/belief.jl index 486aafe3..5ae0b770 100644 --- a/src/belief.jl +++ b/src/belief.jl @@ -6,20 +6,20 @@ # For discrete problems, it can be usually be represented as a vector. # For tools supportng belief updates see POMDPToolbox.jl -abstract Belief <: AbstractDistribution -abstract BeliefUpdater +abstract Belief{T} <: AbstractDistribution{T} +abstract BeliefUpdater{S,A,O} # returns an example initial belief for the pomdp -@pomdp_func initial_belief(pomdp::POMDP, belief::Belief = create_belief(pomdp)) +@pomdp_func initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S} = create_belief(pomdp)) # allocates and returns an empty problem-native belief structure -@pomdp_func create_belief(pomdp::POMDP) +@pomdp_func create_belief{S,A,O}(pomdp::POMDP{S,A,O}) # creates a belief object of the type used by `updater` (for preallocation purposes) -@pomdp_func create_belief(updater::BeliefUpdater) +@pomdp_func create_belief{S,A,O}(updater::BeliefUpdater{S,A,O}) # updates the belief given the old belief (belief_old), the action and the observation -@pomdp_func update(updater::BeliefUpdater, belief_old::Belief, action::Action, obs::Observation, belief_new::Belief=create_belief(updater)) +@pomdp_func update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, belief_new::Belief{S}=create_belief(updater)) # returns a belief that can be updated using `updater` that has a similar distribution to `b` (this conversion may be lossy) -@pomdp_func convert_belief(updater::BeliefUpdater, belief::Belief, new_belief::Belief=create_belief(updater)) = belief +@pomdp_func convert_belief{S,A,O}(updater::BeliefUpdater{S,A,O}, belief::Belief{S}, new_belief::Belief{S}=create_belief(updater)) = belief diff --git a/src/distribution.jl b/src/distribution.jl index bee63d8d..9eeb7d35 100644 --- a/src/distribution.jl +++ b/src/distribution.jl @@ -4,14 +4,14 @@ # DiscreteDistribution: discrete distributions support state indexing and length functions ################################################################# -@pomdp_func create_transition_distribution(pomdp::POMDP) -@pomdp_func create_observation_distribution(pomdp::POMDP) -@pomdp_func rand(rng::AbstractRNG, state::Any, d::AbstractDistribution) -@pomdp_func pdf(d::AbstractDistribution, x::Any) -@pomdp_func domain(d::AbstractDistribution) +@pomdp_func create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) +@pomdp_func pdf{T}(d::AbstractDistribution{T}, x::T) +@pomdp_func domain{T}(d::AbstractDistribution{T}) -abstract DiscreteDistribution <: AbstractDistribution +abstract DiscreteDistribution{T} <: AbstractDistribution{T} -@pomdp_func Base.length(d::DiscreteDistribution) -@pomdp_func weight(d::DiscreteDistribution, i::Int) -@pomdp_func index(pomdp::POMDP, d::DiscreteDistribution, i::Int) +@pomdp_func Base.length{T}(d::DiscreteDistribution{T}) +@pomdp_func weight{T}(d::DiscreteDistribution{T}, i::Int) +@pomdp_func index{S,A,O,T}(pomdp::POMDP{S,A,O}, d::DiscreteDistribution{T}, i::Int) diff --git a/src/policy.jl b/src/policy.jl index 11b69c10..4c8d7e36 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -4,21 +4,21 @@ # The policy is extracted through calls to the action() function. ################################################################# -abstract Policy +abstract Policy{S,A,O} # creates an action object (for preallocation purposes) -@pomdp_func create_action(pomdp::POMDP) +#@pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) # returns a default BeliefUpdater appropriate for a belief type that policy `p` can use -@pomdp_func action(p::Policy, s::State, a::Action) -@pomdp_func action(p::Policy, s::State) -@pomdp_func action(p::Policy, b::Belief, a::Action) -@pomdp_func action(p::Policy, b::Belief) +@pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) +@pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S) +@pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}, a::A) +@pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}) # returns a default BeliefUpdater appropriate for a belief type that policy `p` can use -@pomdp_func updater(policy::Policy) +@pomdp_func updater{S,A,O}(policy::Policy{S,A,O}) # returns the utility value from policy p given the belief -@pomdp_func value(p::Policy, belief::Belief) +@pomdp_func value{S,A,O}(p::Policy{S,A,O}, belief::Belief{S}) # returns the utility value from policy p given the state -@pomdp_func value(p::Policy, state::State) +@pomdp_func value{S,A,O}(p::Policy{S,A,O}, state::S) diff --git a/src/pomdp.jl b/src/pomdp.jl index e2f32cd4..5f9472c6 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -1,31 +1,36 @@ # POMDP model functions -abstract POMDP +abstract POMDP{S,A,O} -abstract State -abstract Action -abstract Observation +# abstract State +# abstract Action +# abstract Observation typealias Reward Float64 -abstract AbstractDistribution +abstract AbstractDistribution{T} # return the space sizes -@pomdp_func n_states(pomdp::POMDP) -@pomdp_func n_actions(pomdp::POMDP) -@pomdp_func n_observations(pomdp::POMDP) +@pomdp_func n_states{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func n_actions{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func n_observations{S,A,O}(pomdp::POMDP{S,A,O}) # return the discount factor -@pomdp_func discount(pomdp::POMDP) +@pomdp_func discount{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func transition(pomdp::POMDP, state::State, action::Action, distribution::AbstractDistribution=create_transition_distribution(pomdp)) -@pomdp_func observation(pomdp::POMDP, state::State, action::Action, statep::State, distribution::AbstractDistribution=create_observation_distribution(pomdp)) -@pomdp_func observation(pomdp::POMDP, state::State, action::Action, distribution::AbstractDistribution=create_observation_distribution(pomdp)) -@pomdp_func reward(pomdp::POMDP, state::State, action::Action, statep::State) +@pomdp_func transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) +@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) +@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) +@pomdp_func reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) -@pomdp_func create_state(pomdp::POMDP) -@pomdp_func create_observation(pomdp::POMDP) +#@pomdp_func create_state{S,A,O}(pomdp::POMDP{S,A,O}) +#@pomdp_func create_observation{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func isterminal(pomdp::POMDP, state::State) = false -@pomdp_func isterminal(pomdp::POMDP, observation::Observation) = false +@pomdp_func isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) = false +@pomdp_func isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) = false -@pomdp_func index(pomdp::POMDP, state::State) +# @pomdp_func isterminal(pomdp::POMDP, observation::Any) = false +# @pomdp_func isterminal_obs(pomdp::POMDP, state::Any) = false + +@pomdp_func index{S,A,O}(pomdp::POMDP{S,A,O}, x::Any) +# @pomdp_func index{S,A,O}(pomdp::POMDP{S,A,O}, action::A) +# @pomdp_func index{S,A,O}(pomdp::POMDP{S,A,O}, obs::O) diff --git a/src/simulator.jl b/src/simulator.jl index 1db1fa48..ed9d31a1 100644 --- a/src/simulator.jl +++ b/src/simulator.jl @@ -5,7 +5,7 @@ ################################################################# # Base type for an object defining how a simulation should be carried out -abstract Simulator +abstract Simulator{S,A,O} # runs a simulation using the specified policy and returns the accumulated reward -@pomdp_func simulate(simulator::Simulator, pomdp::POMDP, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) +@pomdp_func simulate{S,A,O}(simulator::Simulator{S,A,O}, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) diff --git a/src/solver.jl b/src/solver.jl index b96e6930..799c628f 100644 --- a/src/solver.jl +++ b/src/solver.jl @@ -1,5 +1,5 @@ -abstract Solver +abstract Solver{S,A,O} -create_policy(solver::Solver, pomdp::POMDP) = error("$(typeof(pomdp)) does not implement create_policy") -solve(solver::Solver, pomdp::POMDP, policy=create_policy(solver, pomdp)) = error("$(typeof(solver)) does not implement solve for model $(typeof(pomdp))") +create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) = error("$(typeof(pomdp)) does not implement create_policy") +solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) = error("$(typeof(solver)) does not implement solve for model $(typeof(pomdp))") diff --git a/src/space.jl b/src/space.jl index 6d5ad89c..40869305 100644 --- a/src/space.jl +++ b/src/space.jl @@ -3,24 +3,24 @@ # AbstractSpace: the abstract super type for the state, action and observation spaces ################################################################# -abstract AbstractSpace +abstract AbstractSpace{T} # returns an integer -@pomdp_func dimensions(s::AbstractSpace) +@pomdp_func dimensions{T}(s::AbstractSpace{T}) # returns bound of dim i -@pomdp_func lowerbound(s::AbstractSpace, i::Int) +@pomdp_func lowerbound{T}(s::AbstractSpace{T}, i::Int) # returns bound of dim i -@pomdp_func upperbound(s::AbstractSpace, i::Int) +@pomdp_func upperbound{T}(s::AbstractSpace{T}, i::Int) # sample a space and return the sample -@pomdp_func rand(rng::AbstractRNG, state::Any, d::AbstractSpace) +@pomdp_func rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) # return an iterable object corresponding to the space -@pomdp_func iterator(s::AbstractSpace) +@pomdp_func iterator{T}(s::AbstractSpace{T}) # return a space type -@pomdp_func states(pomdp::POMDP) -@pomdp_func states(pomdp::POMDP, state::State, sts::AbstractSpace=states(pomdp)) -@pomdp_func actions(pomdp::POMDP) -@pomdp_func actions(pomdp::POMDP, state::State, acts::AbstractSpace=actions(pomdp)) -@pomdp_func actions(pomdp::POMDP, belief::Belief, acts::AbstractSpace=actions(pomdp)) -@pomdp_func observations(pomdp::POMDP) -@pomdp_func observations(pomdp::POMDP, state::State, obs::AbstractSpace=observations(pomdp)) +@pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}, state::S, sts::AbstractSpace{S}=states(pomdp)) +@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, acts::AbstractSpace{A}=actions(pomdp)) +@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, acts::AbstractSpace{A}=actions(pomdp)) +@pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) From 2c10c1a87ff3b3e6f72677e052e1ce730dc2d849 Mon Sep 17 00:00:00 2001 From: Edward Balaban Date: Sun, 7 Feb 2016 00:30:55 -0800 Subject: [PATCH 02/30] Merged changes --- README.md | 10 +++++----- src/distribution.jl | 18 +++++++++--------- src/space.jl | 26 +++++++++++++------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 8ea8bc86..a7150cc3 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ The core interface provides tools to express problems, program solvers, and setu `AbstractDistribution` - Base type for a probability distribution -- `rand(rng::AbstractRNG, sample, d::AbstractDistribution)` fill with random sample from distribution and return the sample +- `rand(rng::AbstractRNG, d::AbstractDistribution, sample::Any)` fill with random sample from distribution and return the sample - `pdf(d::AbstractDistribution, x)` value of probability distribution function at x **XXX** There are functions missing from this list that are included in `src/distribution.jl` @@ -109,7 +109,7 @@ Several convenience functions are also provided in the interface to provide stan - `dimensions(s::AbstractSpace)` returns the number (integer) of dimensions in a space - `lowerbound(s::AbstractSpace, i::Int)` returns the lower bound of dimension `i` - `upperbound(s::AbstractSpace, i::Int)` returns the upper bound of dimension `i` -- `rand(rng::AbstractRNG, state::Any, d::AbstractSpace)` fill with random sample from space and return the sample +- `rand(rng::AbstractRNG, d::AbstractSpace, sample::Any)` fill with random sample from space and return the sample - `value(policy::Policy, belief::Belief)` returns the utility value from policy p given the belief - `value(policy::Policy, state::State)` returns the utility value from policy p given the state - `convert_belief(updater::BeliefUpdater, b::Belief)` returns a belief that can be updated using `updater` that has a similar distribution to `b` (this conversion may be lossy) @@ -143,7 +143,7 @@ function simulate(simulator::ReferenceSimulator, pomdp::POMDP, policy::Policy, u s = create_state(pomdp) o = create_observation(pomdp) - rand(sim.rng, s, initial_belief) + rand(sim.rng, initial_belief, s) b = convert_belief(updater, initial_belief) @@ -156,12 +156,12 @@ function simulate(simulator::ReferenceSimulator, pomdp::POMDP, policy::Policy, u sp = create_state(pomdp) trans_dist = transition(pomdp, s, a) - rand(sim.rng, sp, trans_dist) + rand(sim.rng, trans_dist, sp) r += disc*reward(pomdp, s, a, sp) obs_dist = observation(pomdp, s, a, sp) - rand(sim.rng, o, obs_dist) + rand(sim.rng, obs_dist, o) b = update(updater, b, a, o) diff --git a/src/distribution.jl b/src/distribution.jl index 9eeb7d35..ca633aab 100644 --- a/src/distribution.jl +++ b/src/distribution.jl @@ -4,14 +4,14 @@ # DiscreteDistribution: discrete distributions support state indexing and length functions ################################################################# -@pomdp_func create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) -@pomdp_func pdf{T}(d::AbstractDistribution{T}, x::T) -@pomdp_func domain{T}(d::AbstractDistribution{T}) +@pomdp_func create_transition_distribution(pomdp::POMDP) +@pomdp_func create_observation_distribution(pomdp::POMDP) +@pomdp_func rand(rng::AbstractRNG, d::AbstractDistribution, sample::Any) +@pomdp_func pdf(d::AbstractDistribution, x::Any) +@pomdp_func domain(d::AbstractDistribution) -abstract DiscreteDistribution{T} <: AbstractDistribution{T} +abstract DiscreteDistribution <: AbstractDistribution -@pomdp_func Base.length{T}(d::DiscreteDistribution{T}) -@pomdp_func weight{T}(d::DiscreteDistribution{T}, i::Int) -@pomdp_func index{S,A,O,T}(pomdp::POMDP{S,A,O}, d::DiscreteDistribution{T}, i::Int) +@pomdp_func Base.length(d::DiscreteDistribution) +@pomdp_func weight(d::DiscreteDistribution, i::Int) +@pomdp_func index(pomdp::POMDP, d::DiscreteDistribution, i::Int) diff --git a/src/space.jl b/src/space.jl index 40869305..550788dd 100644 --- a/src/space.jl +++ b/src/space.jl @@ -3,24 +3,24 @@ # AbstractSpace: the abstract super type for the state, action and observation spaces ################################################################# -abstract AbstractSpace{T} +abstract AbstractSpace # returns an integer -@pomdp_func dimensions{T}(s::AbstractSpace{T}) +@pomdp_func dimensions(s::AbstractSpace) # returns bound of dim i -@pomdp_func lowerbound{T}(s::AbstractSpace{T}, i::Int) +@pomdp_func lowerbound(s::AbstractSpace, i::Int) # returns bound of dim i -@pomdp_func upperbound{T}(s::AbstractSpace{T}, i::Int) +@pomdp_func upperbound(s::AbstractSpace, i::Int) # sample a space and return the sample -@pomdp_func rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) +@pomdp_func rand(rng::AbstractRNG, d::AbstractSpace, state::Any) # return an iterable object corresponding to the space -@pomdp_func iterator{T}(s::AbstractSpace{T}) +@pomdp_func iterator(s::AbstractSpace) # return a space type -@pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}, state::S, sts::AbstractSpace{S}=states(pomdp)) -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, acts::AbstractSpace{A}=actions(pomdp)) -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, acts::AbstractSpace{A}=actions(pomdp)) -@pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}) -@pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) +@pomdp_func states(pomdp::POMDP) +@pomdp_func states(pomdp::POMDP, state::State, sts::AbstractSpace=states(pomdp)) +@pomdp_func actions(pomdp::POMDP) +@pomdp_func actions(pomdp::POMDP, state::State, acts::AbstractSpace=actions(pomdp)) +@pomdp_func actions(pomdp::POMDP, belief::Belief, acts::AbstractSpace=actions(pomdp)) +@pomdp_func observations(pomdp::POMDP) +@pomdp_func observations(pomdp::POMDP, state::State, obs::AbstractSpace=observations(pomdp)) From f60af460e53c3db251f69e9c159eb3be57c77a2e Mon Sep 17 00:00:00 2001 From: Edward Balaban Date: Sun, 7 Feb 2016 14:16:43 -0800 Subject: [PATCH 03/30] Updated index functions. --- src/POMDPs.jl | 4 +++- src/pomdp.jl | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 9cf65e27..36886284 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -35,7 +35,9 @@ export # Discrete Functions length, - index, + state_index, + action_index, + obs_index, weight, # Common Functions diff --git a/src/pomdp.jl b/src/pomdp.jl index 5f9472c6..a9fdc9ab 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -31,6 +31,6 @@ abstract AbstractDistribution{T} # @pomdp_func isterminal(pomdp::POMDP, observation::Any) = false # @pomdp_func isterminal_obs(pomdp::POMDP, state::Any) = false -@pomdp_func index{S,A,O}(pomdp::POMDP{S,A,O}, x::Any) -# @pomdp_func index{S,A,O}(pomdp::POMDP{S,A,O}, action::A) -# @pomdp_func index{S,A,O}(pomdp::POMDP{S,A,O}, obs::O) +@pomdp_func state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) +@pomdp_func action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) +@pomdp_func obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) From 5dcf27fb8bf9e817e97046f22c4172d12e565eb8 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Wed, 16 Mar 2016 12:11:16 -0700 Subject: [PATCH 04/30] got rid of types and functions that are now unnecessary --- src/POMDPs.jl | 18 ++---------------- src/pomdp.jl | 5 +---- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 36886284..29248850 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -5,7 +5,6 @@ import Base.rand export # Abstract type POMDP, - DiscretePOMDP, # Discrete Functions n_states, @@ -23,11 +22,6 @@ export isterminal, isterminal_obs, - # Need below?; - create_state, - create_observation, - create_action, - # Spaces, Distributions and accessor functions AbstractDistribution, DiscreteDistribution, @@ -57,18 +51,10 @@ export Solver, solve, - # States - State, - - # Actions - Action, - create_action, - - # Observations - Observation, - + #= # Rewards Reward, + =# # Beliefs Belief, diff --git a/src/pomdp.jl b/src/pomdp.jl index a9fdc9ab..fca92a7c 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -2,10 +2,7 @@ abstract POMDP{S,A,O} -# abstract State -# abstract Action -# abstract Observation -typealias Reward Float64 +# typealias Reward Float64 abstract AbstractDistribution{T} From 94a342cd33e07bb499f21ca1fc5232fdfa2c789e Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Thu, 17 Mar 2016 14:57:33 -0700 Subject: [PATCH 05/30] removed Reward typealias --- src/pomdp.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pomdp.jl b/src/pomdp.jl index fca92a7c..022589f5 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -2,8 +2,6 @@ abstract POMDP{S,A,O} -# typealias Reward Float64 - abstract AbstractDistribution{T} # return the space sizes From 8bfee2081910059b123f059905d0196c12b312fe Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Tue, 22 Mar 2016 15:40:59 -0700 Subject: [PATCH 06/30] added MDP alias --- src/pomdp.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pomdp.jl b/src/pomdp.jl index 022589f5..14d212e9 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -1,6 +1,7 @@ # POMDP model functions abstract POMDP{S,A,O} +typealias MDP{S,A} POMDP{S,A,S} abstract AbstractDistribution{T} From 6e2cba9d393d603f33677af855a2fdebc8b5b810 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Wed, 23 Mar 2016 17:14:58 -0700 Subject: [PATCH 07/30] exported MDP --- src/POMDPs.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 3b6ee070..29c373b0 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -5,6 +5,7 @@ import Base.rand export # Abstract type POMDP, + MDP, # Discrete Functions n_states, From 8eba0c2850b48cf272604479352ff09ed57b7636 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 25 Mar 2016 15:18:42 -0700 Subject: [PATCH 08/30] added default_constructors.jl back in after merge took it out --- src/POMDPs.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index ca5985dd..29c373b0 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -80,6 +80,7 @@ export include("errors.jl") include("constants.jl") include("utils.jl") +include("default_constructors.jl") include("pomdp.jl") include("distribution.jl") include("belief.jl") From a0ca88c1db6307be4282cf01a5ca1863c6d3a501 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 25 Mar 2016 15:21:47 -0700 Subject: [PATCH 09/30] actually added the default_constructors file --- src/default_constructors.jl | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/default_constructors.jl diff --git a/src/default_constructors.jl b/src/default_constructors.jl new file mode 100644 index 00000000..beb3bcb0 --- /dev/null +++ b/src/default_constructors.jl @@ -0,0 +1,5 @@ +# implements some default zero-argument constructors for bitstypes that do not have them (see issue #65) + +Base.Bool() = zero(Bool) +Base.Int() = zero(Int) +Base.Float64() = zero(Float64) From e8c45324bfc6d253ba6cc152d26ec12c466a0614 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Tue, 29 Mar 2016 11:50:53 -0700 Subject: [PATCH 10/30] changed MDP type heirarchy (see #60) --- src/pomdp.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pomdp.jl b/src/pomdp.jl index 14d212e9..0e0d4d9e 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -1,7 +1,7 @@ # POMDP model functions abstract POMDP{S,A,O} -typealias MDP{S,A} POMDP{S,A,S} +abstract MDP{S,A} <: POMDP{S,A,S} abstract AbstractDistribution{T} From 3bcaeb81fd3b800b11b86626caccfba88ef66070 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Thu, 31 Mar 2016 15:15:33 -0700 Subject: [PATCH 11/30] added a few docstrings just to make sure they work with @pomdp_func --- src/pomdp.jl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/pomdp.jl b/src/pomdp.jl index 0e0d4d9e..75b4c52c 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -1,6 +1,19 @@ # POMDP model functions +""" +Abstract base type for a partially observable Markov decision process. + S: state type + A: action type + O: observation type +""" abstract POMDP{S,A,O} + +""" +Abstract base type for a fully observable Markov decision process. + + S: state type + A: action type +""" abstract MDP{S,A} <: POMDP{S,A,S} abstract AbstractDistribution{T} @@ -10,7 +23,11 @@ abstract AbstractDistribution{T} @pomdp_func n_actions{S,A,O}(pomdp::POMDP{S,A,O}) @pomdp_func n_observations{S,A,O}(pomdp::POMDP{S,A,O}) -# return the discount factor +""" + discount{S,A,O}(pomdp::POMDP{S,A,O}) + +Return the discount factor for the problem. +""" @pomdp_func discount{S,A,O}(pomdp::POMDP{S,A,O}) @pomdp_func transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) From d710a749a480359458e154eb97ff6f54ed227e72 Mon Sep 17 00:00:00 2001 From: Maxim Egorov Date: Mon, 4 Apr 2016 11:17:02 -0700 Subject: [PATCH 12/30] doc update --- src/POMDPs.jl | 2 +- src/docs.jl | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 29c373b0..21434071 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -88,7 +88,7 @@ include("space.jl") include("solver.jl") include("policy.jl") include("simulator.jl") -#include("docs.jl") +include("docs.jl") end diff --git a/src/docs.jl b/src/docs.jl index 9b6de446..2dd612cc 100644 --- a/src/docs.jl +++ b/src/docs.jl @@ -19,24 +19,6 @@ Base type for state, action and observation spaces AbstractSpace -""" -Base type for states -""" -State - - -""" -Base type for actions -""" -Action - - -""" -Base type for observation -""" -Observation - - """ states(pomdp::POMDP) From aa18f829f70e7c7a7a431b28bd0eefb8c6b36307 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Tue, 5 Apr 2016 12:59:53 -0700 Subject: [PATCH 13/30] removed unneeded parameterizations --- src/policy.jl | 20 ++++++++------------ src/simulator.jl | 4 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/policy.jl b/src/policy.jl index 4c8d7e36..2a1be9a9 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -4,21 +4,17 @@ # The policy is extracted through calls to the action() function. ################################################################# -abstract Policy{S,A,O} +abstract Policy -# creates an action object (for preallocation purposes) -#@pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func action(p::Policy, s, a) +@pomdp_func action(p::Policy, s) +@pomdp_func action(p::Policy, b::Belief, a) +@pomdp_func action(p::Policy, b::Belief) # returns a default BeliefUpdater appropriate for a belief type that policy `p` can use -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S) -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}, a::A) -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}) - -# returns a default BeliefUpdater appropriate for a belief type that policy `p` can use -@pomdp_func updater{S,A,O}(policy::Policy{S,A,O}) +@pomdp_func updater(policy::Policy) # returns the utility value from policy p given the belief -@pomdp_func value{S,A,O}(p::Policy{S,A,O}, belief::Belief{S}) +@pomdp_func value(p::Policy, belief::Belief) # returns the utility value from policy p given the state -@pomdp_func value{S,A,O}(p::Policy{S,A,O}, state::S) +@pomdp_func value(p::Policy, state) diff --git a/src/simulator.jl b/src/simulator.jl index ed9d31a1..25252ef7 100644 --- a/src/simulator.jl +++ b/src/simulator.jl @@ -5,7 +5,7 @@ ################################################################# # Base type for an object defining how a simulation should be carried out -abstract Simulator{S,A,O} +abstract Simulator # runs a simulation using the specified policy and returns the accumulated reward -@pomdp_func simulate{S,A,O}(simulator::Simulator{S,A,O}, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) +@pomdp_func simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) From 9474f6e3bb2d8ee2341690a5a9a047103deeb8b7 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Tue, 5 Apr 2016 14:38:08 -0700 Subject: [PATCH 14/30] tested macros, resoved some ambiguities, closes #38 --- README.md | 3 +++ src/POMDPs.jl | 3 +++ src/errors.jl | 16 ++++++++++------ src/policy.jl | 6 ++---- src/pomdp.jl | 2 +- test/runtests.jl | 15 +++++++++++++-- 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 96cffda3..3223ff6e 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ The goal is to provide a common programming vocabulary for researchers and stude 2. Writing solver software. 3. Running simulations efficiently. +For problems and solvers that only use a generative model (rather than explicit transition and observation distributions), see also [GenerativeModels.jl](https://github.com/JuliaPOMDP/GenerativeModels.jl). ## Installation ```julia @@ -34,7 +35,9 @@ using POMDPs # the following command adds the SARSOP solver, you can add any supported solver this way POMDPs.add("SARSOP") ``` +## Documentation +TODO: Link to documentation ## Tutorials diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 29c373b0..0b7d3e25 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -1,3 +1,6 @@ +""" +Provides a basic interface for defining and solving MDPs/POMDPs +""" module POMDPs import Base.rand diff --git a/src/errors.jl b/src/errors.jl index 47dd0f48..bfba5a15 100644 --- a/src/errors.jl +++ b/src/errors.jl @@ -3,6 +3,9 @@ # throws an error if the interface function is not implemented. ############################################################### +""" +Provide a default function implementation that throws an error when called. +""" macro pomdp_func(signature) if signature.head == :(=) # in this case a default implementation has already been supplied return esc(signature) @@ -34,17 +37,18 @@ macro pomdp_func(signature) body = Expr(:call, :error, parse("\"$error_string\"")) - return Expr(:function, esc(signature), body) + return Expr(:function, esc(signature), esc(body)) end -# strip_arg strips anything extra (type annotations, default values, etc) from an argument -# for now this cannot handle keyword arguments (it will throw an error) +""" +Strip anything extra (type annotations, default values, etc) from an argument. +For now this cannot handle keyword arguments (it will throw an error). +""" strip_arg(arg::Symbol) = arg # once we have a symbol, we have stripped everything, so we can just return it - function strip_arg(arg_expr::Expr) - if arg_expr.head == :parameters # keywork argument - error("extract_arg_names can't handle keyword args yet (parsing arg expression $(arg_expr))") + if arg_expr.head == :parameters # keyword argument + error("strip_arg can't handle keyword args yet (parsing arg expression $(arg_expr))") elseif arg_expr.head == :(::) # argument is type annotated, remove the annotation return strip_arg(arg_expr.args[1]) elseif arg_expr.head == :kw # argument has a default value, remove the default diff --git a/src/policy.jl b/src/policy.jl index 2a1be9a9..5c799bca 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -6,10 +6,8 @@ abstract Policy -@pomdp_func action(p::Policy, s, a) -@pomdp_func action(p::Policy, s) -@pomdp_func action(p::Policy, b::Belief, a) -@pomdp_func action(p::Policy, b::Belief) +@pomdp_func action(p::Policy, state_or_belief, action) +@pomdp_func action(p::Policy, state_or_belief) # returns a default BeliefUpdater appropriate for a belief type that policy `p` can use @pomdp_func updater(policy::Policy) diff --git a/src/pomdp.jl b/src/pomdp.jl index 75b4c52c..95bed95f 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -28,7 +28,7 @@ abstract AbstractDistribution{T} Return the discount factor for the problem. """ -@pomdp_func discount{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func discount(pomdp::POMDP) @pomdp_func transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) @pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) diff --git a/test/runtests.jl b/test/runtests.jl index 91a5b399..a2951703 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,2 +1,13 @@ -# just test to see if the import works -import POMDPs +using Base.Test + +using POMDPs +type A <: POMDP{Bool,Bool,Bool} end +@test_throws ErrorException discount(A()) + +@test POMDPs.strip_arg(:a) == :a +@test POMDPs.strip_arg(parse("a::Int")) == :a +kw_expr = Expr(:kw, parse("a::Int"), false, Any) +@test POMDPs.strip_arg(kw_expr) == :a + +POMDPs.@pomdp_func testfunc(a, b::Int, c::Bool=false) +@test_throws ErrorException testfunc(1,2) From f3236cfabc61cfdaecbf890d651536a90dec3224 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Tue, 5 Apr 2016 18:08:50 -0700 Subject: [PATCH 15/30] updated solver to use @pomdp_func --- src/solver.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/solver.jl b/src/solver.jl index 799c628f..59cdf970 100644 --- a/src/solver.jl +++ b/src/solver.jl @@ -1,5 +1,4 @@ - abstract Solver{S,A,O} -create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) = error("$(typeof(pomdp)) does not implement create_policy") -solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) = error("$(typeof(solver)) does not implement solve for model $(typeof(pomdp))") +@pomdp_func create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) +@pomdp_func solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) From 30bb5275e5e8bc61212043d43fac2b1b8f476cfc Mon Sep 17 00:00:00 2001 From: Maxim Egorov Date: Wed, 6 Apr 2016 08:13:36 -0700 Subject: [PATCH 16/30] added doc files --- README.md | 140 +------------------- docs/make.jl | 12 ++ docs/mkdocs.yml | 32 +++++ docs/src/api.md | 110 ++++++++++++++++ docs/src/guide.md | 78 ++++++++++++ docs/src/index.md | 29 +++++ src/POMDPs.jl | 2 +- src/belief.jl | 40 +++++- src/constants.jl | 12 +- src/distribution.jl | 27 +++- src/docs.jl | 303 ++++++++++++-------------------------------- src/io.jl | 2 +- src/policy.jl | 48 ++++++- src/pomdp.jl | 81 +++++++++++- src/simulator.jl | 10 +- src/solver.jl | 14 ++ src/space.jl | 74 +++++++++-- src/utils.jl | 11 +- 18 files changed, 637 insertions(+), 388 deletions(-) create mode 100644 docs/make.jl create mode 100644 docs/mkdocs.yml create mode 100644 docs/src/api.md create mode 100644 docs/src/guide.md create mode 100644 docs/src/index.md diff --git a/README.md b/README.md index 96cffda3..656fa506 100644 --- a/README.md +++ b/README.md @@ -45,142 +45,6 @@ The following tutorials aim to get you up to speed with POMDPs.jl: of using SARSOP and QMDP to solve the tiger problem -## Core Interface +## Documentation -The core interface provides tools to express problems, program solvers, and setup simulations. - -**TODO** this list is not complete! there are some functions in src missing documentation that were not included here - - -### Distributions - -`AbstractDistribution` - Base type for a probability distribution - -- `rand(rng::AbstractRNG, d::AbstractDistribution, sample::Any)` fill with random sample from distribution and return the sample -- `pdf(d::AbstractDistribution, x)` value of probability distribution function at x - -**XXX** There are functions missing from this list that are included in `src/distribution.jl` - -### Problem Model - -`POMDP` - Base type for a problem definition
-`AbstractSpace` - Base type for state, action, and observation spaces
-`State` - Base type for states
-`Action` - Base type for actions
-`Observation` - Base type for observations - -- `states(pomdp::POMDP)` returns the complete state space -- `states(pomdp::POMDP, state::State, sts::AbstractSpace=states(pomdp))` modifies `sts` to the state space accessible from the given state and returns it -- `actions(pomdp::POMDP)` returns the complete action space -- `actions(pomdp::POMDP, state::State, aspace::AbstractSpace=actions(pomdp))` modifies `aspace` to the action space accessible from the given state and returns it -- `actions(pomdp::POMDP, belief::Belief, aspace::AbstractSpace=actions(pomdp))` modifies `aspace` to the action space accessible from the states with nonzero belief and returns it -- `observations(pomdp::POMDP)` returns the complete observation space -- `observations(pomdp::POMDP, state::State, ospace::AbstractSpace)` modifies `ospace` to the observation space accessible from the given state and returns it -- `reward(pomdp::POMDP, state::State, action::Action, statep::State)` returns the immediate reward for the s-a-s' triple -- `transition(pomdp::POMDP, state::State, action::Action, distribution=create_transition_distribution(pomdp))` modifies `distribution` to the transition distribution from the current state-action pair and returns it -- `observation(pomdp::POMDP, state::State, action::Action, statep::State, distribution=create_observation_distribution(pomdp))` modifies `distribution` to the observation distribution for the s-a-s' tuple (state, action, and next state) and returns it -- `observation(pomdp::POMDP, state::State, action::Action, distribution=create_observation_distribution(pomdp))` modifies `distribution` to the observation distribution for the s-a pair (state and action) and returns it -- `discount(pomdp::POMDP)` returns the discount factor -- `isterminal(pomdp::POMDP, state::State)` checks if a state is terminal -- `isterminal(pomdp::POMDP, observation::Observation)` checks if an observation is terminal. A terminal observation should be generated only upon transition to a terminal state. - -**XXX** Missing functions such as `n_states`, `n_actions` (see `src/pomdp.jl`) - -### Solvers and Polices - -`Solver` - Base type a solver
-`Policy` - Base type for a policy (a map from every possible belief, or more abstract policy state, to an optimal or suboptimal action) - -- `solve(solver::Solver, pomdp::POMDP, policy::Policy=create_policy(solver, pomdp))` solves the POMDP and modifies `policy` to be the solution of `pomdp` and returns it -- `action(policy::Policy, belief::Belief)` or `action(policy::Policy, belief::Belief, act::Action)` returns an action for the current belief given the policy (the method with three arguments modifies `act` and returns it) -- `action(policy::Policy, state::State)` or `action(policy::Policy, state::State, act::Action)` returns an action for the current state given the policy - -### Belief - -`Belief` - Base type for an object representing some knowledge about the state (often a probability distribution)
-`BeliefUpdater` - Base type for an object that defines how a belief should be updated - -- `update(updater::BeliefUpdater, belief_old::Belief, action::Action, obs::Observation, belief_new::Belief=create_belief(updater))` modifies `belief_new` to the belief given the old belief (`belief_old`) and the latest action and observation and returns the updated belief. - -### Simulation - -`Simulator` - Base type for an object defining how a simulation should be carried out - -- `simulate(simulator::Simulator, pomdp::POMDP, policy::Policy, updater::BeliefUpdater, initial_belief::Belief)` runs a simulation using the specified policy and returns the accumulated reward - -## Minor Components - -### Convenience Functions - -Several convenience functions are also provided in the interface to provide standard vocabulary for common tasks and may be used by some solvers or in simulation, but they are not strictly necessary for expressing problems. - -- `index(pomdp::POMDP, state::State)` returns the index of the given state for a discrete POMDP -- `initial_belief(pomdp::POMDP)` returns an example initial belief for the pomdp -- `iterator(space::AbstractSpace)` returns an iterator over a space or an iterable object containing the space (such as an array) -- `dimensions(s::AbstractSpace)` returns the number (integer) of dimensions in a space -- `lowerbound(s::AbstractSpace, i::Int)` returns the lower bound of dimension `i` -- `upperbound(s::AbstractSpace, i::Int)` returns the upper bound of dimension `i` -- `rand(rng::AbstractRNG, d::AbstractSpace, sample::Any)` fill with random sample from space and return the sample -- `value(policy::Policy, belief::Belief)` returns the utility value from policy p given the belief -- `value(policy::Policy, state::State)` returns the utility value from policy p given the state -- `convert_belief(updater::BeliefUpdater, b::Belief)` returns a belief that can be updated using `updater` that has a similar distribution to `b` (this conversion may be lossy) -- `updater(p::Policy)` returns a default BeliefUpdater appropriate for a belief type that policy `p` can use - -### Object Creators - -In many cases, it is more efficient to fill pre-allocated objects with new data rather than create new objects at each iteration of an algorithm or simulation. When a new object is needed, the following functions may be called. They should return an object of the appropriate type as efficiently as possible. The data in the object does not matter - it will be overwritten when the object is used. - -- `create_state(pomdp::POMDP)` creates a single state object (for preallocation purposes) -- `create_observation(pomdp::POMDP)` creates a single observation object (for preallocation purposes) -- `create_transition_distribution(pomdp::POMDP)` returns a transition distribution -- `create_observation_distribution(pomdp::POMDP)` returns an observation distribution -- `create_policy(solver::Solver, pomdp::POMDP)` creates a policy object (for preallocation purposes) -- `create_action(pomdp::POMDP)` creates an action object (for preallocation purposes) -- `create_belief(updater::BeliefUpdater)` creates a belief object of the type used by `updater` (for preallocation purposes) -- `create_belief(pomdp::POMDP)` creates an empty problem-native belief object (for preallocation purposes) - - -## Reference Simulation Implementation - -This reference simulation implementation shows how the various functions will be used. Please note that this example is written for clarity and not efficiency (see [TODO: link to main doc] for efficiency tips). - -```julia -type ReferenceSimulator - rng::AbstractRNG - max_steps -end - -function simulate(simulator::ReferenceSimulator, pomdp::POMDP, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) - - s = create_state(pomdp) - o = create_observation(pomdp) - rand(sim.rng, initial_belief, s) - - b = convert_belief(updater, initial_belief) - - step = 1 - disc = 1.0 - r = 0.0 - - while step <= sim.max_steps && !isterminal(pomdp, s) - a = action(policy, b) - - sp = create_state(pomdp) - trans_dist = transition(pomdp, s, a) - rand(sim.rng, trans_dist, sp) - - r += disc*reward(pomdp, s, a, sp) - - obs_dist = observation(pomdp, s, a, sp) - rand(sim.rng, obs_dist, o) - - b = update(updater, b, a, o) - - s = sp - disc *= discount(pomdp) - step += 1 - end - -end - -``` +Detailed documentation can be found [here](http://juliapomdp.github.io/POMDPs.jl/latest/). diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 00000000..b15725ae --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,12 @@ +using Documenter, POMDPs + +makedocs( + # options + modules = [POMDPs] +) + +deploydocs( + repo = "github.com/JuliaPOMDP/POMDPs.jl.git", + julia = "release", + osname = "linux" +) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml new file mode 100644 index 00000000..9ae0ffd9 --- /dev/null +++ b/docs/mkdocs.yml @@ -0,0 +1,32 @@ +site_name: POMDPs.jl +repo_url: https://github.com/JuliaPOMDP/POMDPs.jl +site_description: API for solving partially observable Markov decision processes in Julia. +site_author: Maxim Egorov + +theme: readthedocs + +extra: + palette: + primary: 'indigo' + accent: 'blue' + +extra_css: + - assets/Documenter.css + +markdown_extensions: + - codehilite + - extra + - tables + - fenced_code + +extra_javascript: + - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML + - assets/mathjaxhelper.js + +docs_dir: 'build' + +pages: +- Home: index.md +- Manual: guide.md +- API: api.md + diff --git a/docs/src/api.md b/docs/src/api.md new file mode 100644 index 00000000..d58c79a8 --- /dev/null +++ b/docs/src/api.md @@ -0,0 +1,110 @@ +# Solver Documentation + +Documentation for the `POMDPs.jl` user interface. You can get help for any type or +function in the module by typing `?` in the Julia REPL followed by the name of +type or function. For example: + +```julia +julia>using POMDPs +julia>? +help?>reward +search: reward + + reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) + + Returns the immediate reward for the s-a-s triple + + reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) + + Returns the immediate reward for the s-a pair + +``` + + {meta} + CurrentModule = POMDPs + +## Contents + + {contents} + Pages = ["api.md"] + +## Index + + {index} + Pages = ["api.md"] + + +## Types + + {docs} + POMDP + MDP + AbstractSpace + AbstractDistribution + Solver + Policy + Belief + BeliefUpdater + +## Model Functions + + {docs} + states + actions + observations + reward + transition + observation + isterminal + isterminal_obs + n_states + n_actions + n_observations + state_index + action_index + obs_index + discount + +## Distribution/Space Functions + + {docs} + rand + pdf + dimensions + iterator + create_transition_distribution + create_observation_distribution + +## Belief Functions + + {docs} + initial_belief + create_belief + update + convert_belief + +## Policy and Solver Functions + + {docs} + create_policy + solve + updater + action + value + +## Simulator + + {docs} + Simulator + simulate + +## Utility Tools + + {docs} + add + +## Constants + + {docs} + REMOTE_URL + SUPPORTED_SOLVERS diff --git a/docs/src/guide.md b/docs/src/guide.md new file mode 100644 index 00000000..765811fa --- /dev/null +++ b/docs/src/guide.md @@ -0,0 +1,78 @@ +# Package Guide + +## Installation + +The package can be installed by cloning the code from the github repository +[POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) + +Installation with POMDPs.jl: +```julia +Pkg.clone("https://github.com/JuliaPOMDP/POMDPs.jl.git") +``` + +The package is currently not registered in meta-data. + +## Usage + +POMDPs serves as the interface used by a number of packages under the [JuliaPOMDP]() framework. It is essentially the +agreed upon API used by all the other packages in JuliaPOMDP. If you are using this framework, you may be trying to +accomplish one or more of the following three goals: + +- Solve a decision or planning problem with stochastic dynamics (MDP) or partial observability (POMDP) +- Evaluate a solution in simulation +- Test your custom algorithm for solving MDPs or POMDPs against other state-of-the-art algorithms + +If you are attempting to complete the first two goals, take a look at these Jupyer Notebook tutorials: + +* [MDP Tutorial](http://nbviewer.ipython.org/github/sisl/POMDPs.jl/blob/master/examples/GridWorld.ipynb) for beginners gives an overview of using Value Iteration and Monte-Carlo Tree Search with the classic grid world problem +* [POMDP Tutorial](http://nbviewer.ipython.org/github/sisl/POMDPs.jl/blob/master/examples/Tiger.ipynb) gives an overview of using SARSOP and QMDP to solve the tiger problem + +If you are trying to write your own algorithm for solving MDPs or POMDPs with this interface take a look at the API +section of this guide. + +## Example Simulation Implementation + + +This reference simulation implementation shows how the various functions will be used. Please note that this example is +written for clarity and not efficiency. + +```julia +type ReferenceSimulator + rng::AbstractRNG + max_steps +end + +function simulate(simulator::ReferenceSimulator, pomdp::POMDP, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) + + s = create_state(pomdp) + o = create_observation(pomdp) + rand(sim.rng, initial_belief, s) + + b = convert_belief(updater, initial_belief) + + step = 1 + disc = 1.0 + r = 0.0 + + while step <= sim.max_steps && !isterminal(pomdp, s) + a = action(policy, b) + + sp = create_state(pomdp) + trans_dist = transition(pomdp, s, a) + rand(sim.rng, trans_dist, sp) + + r += disc*reward(pomdp, s, a, sp) + + obs_dist = observation(pomdp, s, a, sp) + rand(sim.rng, obs_dist, o) + + b = update(updater, b, a, o) + + s = sp + disc *= discount(pomdp) + step += 1 + end + +end + +``` diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 00000000..72f88bf7 --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,29 @@ +# POMDPs +*A Julia interface for defining, solving and simulating partially observable Markov decision processes and their fully +observable counterparts.* + +## Package Features + +- General interface that can handle problems with discrete and continuous state/action/observation spaces +- A number of popular state-of-the-art solvers availiable to use out of the box +- Tools that make it easy to define problems and simulate solutions +- Simple integration of custom solvers into the existing interface + +## Availible Solvers + +The following MDP solvers support this interface: + +- [Value Iteration](https://github.com/JuliaPOMDP/DiscreteValueIteration.jl) +- [Monte Carlo Tree Search](https://github.com/JuliaPOMDP/MCTS.jl) + +The following POMDP solvers support this interface: + +- [QMDP](https://github.com/JuliaPOMDP/QMDP.jl) +- [SARSOP](https://github.com/JuliaPOMDP/SARSOP.jl) +- [POMCP](https://github.com/JuliaPOMDP/POMCP.jl) +- [POMDPSolve](https://github.com/JuliaPOMDP/POMDPSolve.jl) + +## Manual Outline + + {contents} + diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 21434071..29c373b0 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -88,7 +88,7 @@ include("space.jl") include("solver.jl") include("policy.jl") include("simulator.jl") -include("docs.jl") +#include("docs.jl") end diff --git a/src/belief.jl b/src/belief.jl index 5ae0b770..e9833f1a 100644 --- a/src/belief.jl +++ b/src/belief.jl @@ -6,20 +6,52 @@ # For discrete problems, it can be usually be represented as a vector. # For tools supportng belief updates see POMDPToolbox.jl +""" +Abstract type for an object representing some knowledge about the state (often a probability distribution) + + T: the type over which the belief is over (e.g. state) +""" abstract Belief{T} <: AbstractDistribution{T} + +""" +Abstract type for an object that defines how a belief should be updated +""" abstract BeliefUpdater{S,A,O} -# returns an example initial belief for the pomdp +""" + initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S} = create_belief(pomdp)) + +Returns an initial belief for the pomdp. +""" @pomdp_func initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S} = create_belief(pomdp)) -# allocates and returns an empty problem-native belief structure +""" + create_belief(pomdp::POMDP) + +Creates a belief either to be used by updater or pomdp +""" @pomdp_func create_belief{S,A,O}(pomdp::POMDP{S,A,O}) -# creates a belief object of the type used by `updater` (for preallocation purposes) +""" + create_belief{S,A,O}(updater::BeliefUpdater{S,A,O}) + +Creates a belief object of the type used by `updater` (preallocates memory) +""" @pomdp_func create_belief{S,A,O}(updater::BeliefUpdater{S,A,O}) -# updates the belief given the old belief (belief_old), the action and the observation +""" + @pomdp_func update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, + belief_new::Belief{S}=create_belief(updater)) + +Returns a new instance of an updated belief given `belief_old` and the latest action and observation. +""" @pomdp_func update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, belief_new::Belief{S}=create_belief(updater)) # returns a belief that can be updated using `updater` that has a similar distribution to `b` (this conversion may be lossy) +""" + convert_belief{S,A,O}(updater::BeliefUpdater{S,A,O}, belief::Belief{S}, + new_belief::Belief{S}=create_belief(updater)) = belief + +Returns a belief that can be updated using `updater` that has a similar distribution to `belief`. +""" @pomdp_func convert_belief{S,A,O}(updater::BeliefUpdater{S,A,O}, belief::Belief{S}, new_belief::Belief{S}=create_belief(updater)) = belief diff --git a/src/constants.jl b/src/constants.jl index d47ba65a..f1d98e07 100644 --- a/src/constants.jl +++ b/src/constants.jl @@ -1,7 +1,15 @@ -# url to remote organization repo +""" +url to remote JuliaPOMDP organization repo +""" const REMOTE_URL = "https://github.com/JuliaPOMDP/" -# supported solvers +""" +Set containing string names of officially supported solvers +(e.g. `MCTS`, `SARSOP`, etc). +If you have a validated solver that supports the POMDPs.jl API, +contact the developers to add your solver to this list. +""" +# TODO (max): would it be better to have a dict of form: string => url for solvers? const SUPPORTED_SOLVERS = Set{AbstractString}( ["DiscreteValueIteration", "MCTS", diff --git a/src/distribution.jl b/src/distribution.jl index 9eeb7d35..35279fc4 100644 --- a/src/distribution.jl +++ b/src/distribution.jl @@ -4,14 +4,39 @@ # DiscreteDistribution: discrete distributions support state indexing and length functions ################################################################# +""" + create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns a transition distribution (for memory preallocation). +""" @pomdp_func create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns an observation distribution (for memory preallocation). +""" @pomdp_func create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) + +Fill `sample` with a random element from distribution `d`. The sample can be a state, action or observation. +""" @pomdp_func rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) + +""" + pdf{T}(d::AbstractDistribution{T}, x::T) + +Value of probability distribution `d` function at sample `x`. +""" @pomdp_func pdf{T}(d::AbstractDistribution{T}, x::T) + +# TODO (max): does this have a purpose now that we have iterator? @pomdp_func domain{T}(d::AbstractDistribution{T}) +# TODO (max): need an explicit treamtent of discrete distributions? abstract DiscreteDistribution{T} <: AbstractDistribution{T} - @pomdp_func Base.length{T}(d::DiscreteDistribution{T}) @pomdp_func weight{T}(d::DiscreteDistribution{T}, i::Int) @pomdp_func index{S,A,O,T}(pomdp::POMDP{S,A,O}, d::DiscreteDistribution{T}, i::Int) diff --git a/src/docs.jl b/src/docs.jl index 2dd612cc..39351e4f 100644 --- a/src/docs.jl +++ b/src/docs.jl @@ -7,20 +7,17 @@ POMDPs ####################### Problem Model ########################### ################################################################# -""" -Base type for a POMDP model, defined by the user -""" -POMDP - """ -Base type for state, action and observation spaces +Base type for state, action and observation spaces. + + T: type that parametarizes the space (state, action, or observation) """ AbstractSpace """ - states(pomdp::POMDP) + states{S,A,O}(pomdp::POMDP{S,A,O}) Returns the complete state space of a POMDP. """ @@ -28,345 +25,201 @@ states """ - actions(pomdp::POMDP) + actions{S,A,O}(pomdp::POMDP{S,A,O}) Returns the entire action space of a POMDP. """ -actions(pomdp::POMDP) +actions{S,A,O}(pomdp::POMDP{S,A,O}) """ - actions(pomdp::POMDP, state::State, aspace::AbstractSpace) + actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) Modifies aspace to the action space accessible from the given state and returns it. """ -actions(pomdp::POMDP, state::State, aspace::AbstractSpace) +actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) """ - actions(pomdp::POMDP, belief::Belief, aspace::AbstractSpace) + actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, aspace::AbstractSpace{A}) Modifies aspace to the action space accessible from the states with nonzero belief and returns it. """ -actions(pomdp::POMDP, belief::Belief, aspace::AbstractSpace) +actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, aspace::AbstractSpace{A}) """ - observations(pomdp::POMDP) + observations{S,A,O}(pomdp::POMDP{S,A,O}) Returns the entire observation space. """ -observations(pomdp::POMDP) - +observations{S,A,O}(pomdp::POMDP{S,A,O}) """ - observations(pomdp::POMDP, state::State, ospace::AbstractSpace) + observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) Modifies ospace to the observation space accessible from the given state and returns it. """ -observations(pomdp::POMDP, state::State, ospace::AbstractSpace) +observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) """ - reward(pomdp::POMDP, state::State, action::Action, statep::State) + reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) Returns the immediate reward for the s-a-s' triple """ -reward +reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) """ - transition(pomdp::POMDP, state::State, action::Action) + reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) -Returns the transition distribution from the current state-action pair +Returns the immediate reward for the s-a pair """ -transition(pomdp::POMDP, state::State, action::Action) +reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) + """ - transition(pomdp::POMDP, state::State, action::Action, distribution::AbstractDistribution) + transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, +distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) -Modifies distribution to the transition distribution from the current state-action pair and returns it +Returns the transition distribution from the current state-action pair """ -transition(pomdp::POMDP, state::State, action::Action, distribution::AbstractDistribution) +transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, +distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) """ - observation(pomdp::POMDP, state::State, action::Action, statep::State) + observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) Returns the observation distribution for the s-a-s' tuple (state, action, and next state) """ -observation(pomdp::POMDP, state::State, action::Action, statep::State) - +observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) """ - observation(pomdp::POMDP, state::State, action::Action, statep::State, distribution::AbstractDistribution) + observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) Modifies distribution to the observation distribution for the s-a-s' tuple (state, action, and next state) and returns it """ -observation(pomdp::POMDP, state::State, action::Action, statep::State, distribution::AbstractDistribution) - +observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) -""" - discount(pomdp::POMDP) -Returns the discount factor """ -discount - - -""" - isterminal(pomdp::POMDP, s::State) + isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) Checks if state s is terminal """ -isterminal - - - -################################################################# -####################### Distributions ########################### -################################################################# - -""" -Base type for a probability distribution -""" -AbstractDistribution - -""" - rand(rng::AbstractRNG, d::AbstractDistribution, sample) - -Fill sample with a random element from distribution d. The sample can be a state, action or observation. -""" -rand(rng::AbstractRNG, d::AbstractDistribution, sample) +isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) -""" - rand(rng::AbstractRNG, s::AbstractSpace, sample) - -Fill sample with a random element from space s. The sample can be a state, action or observation. -""" -rand(rng::AbstractRNG, s::AbstractSpace, sample) """ - pdf(d::AbstractDistribution, x) - -Value of probability distribution function at x + isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) +Checks if an observation is terminal. """ -pdf - +isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) - -################################################################# -##################### Solvers and Policies ###################### -################################################################# - """ -Base type for an MDP/POMDP solver -""" -Solver + n_states{S,A,O}(pomdp::POMDP{S,A,O}) +Returns the number of states in `pomdp`. Used for discrete models only. """ -Base type for a policy (a map from every possible belief, or more abstract policy state, to an optimal or suboptimal action) -""" -Policy +n_states{S,A,O}(pomdp::POMDP{S,A,O}) """ - solve(solver::Solver, pomdp::POMDP) + n_actions{S,A,O}(pomdp::POMDP{S,A,O}) -Solves the POMDP using method associated with solver, and returns a policy. +Returns the number of actions in `pomdp`. Used for discrete models only. """ -solve(solver::Solver, pomdp::POMDP) +n_actions{S,A,O}(pomdp::POMDP{S,A,O}) """ - solve(solver::Solver, pomdp::POMDP, policy::Policy) + n_observations{S,A,O}(pomdp::POMDP{S,A,O}) -Solves the POMDP and modifies policy to be the solution of pomdp and returns it +Returns the number of actions in `pomdp`. Used for discrete models only. """ -solve(solver::Solver, pomdp::POMDP, policy::Policy) - - -""" - action(policy::Policy, belief::Belief) - -Returns an action for the current belief, given the policy - -""" -action(policy::Policy, belief::Belief) - -""" - action(policy::Policy, belief::Belief, a::Action) - -Fills and returns action a for the current belief, given the policy -""" -action(policy::Policy, belief::Belief, a::Action) +n_observations{S,A,O}(pomdp::POMDP{S,A,O}) """ - action(policy::Policy, s::State) - -Returns an action for the current state, given the policy + state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) +Returns the integer index of state `s`. Used for discrete models only. """ -action(policy::Policy, s::State) +state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) """ - action(policy::Policy, s::State, a::Action) + action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) -Fills and returns action a for the current state, given the policy +Returns the integer index of action `a`. Used for discrete models only. """ -action(policy::Policy, s::State, a::Action) +action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) """ - value(policy::Policy, s::State) + obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) -Returns the utility value from a given state +Returns the integer index of observation `o`. Used for discrete models only. """ -value(policy::Policy, s::State) - -""" - value(policy::Policy, b::Belief) - -Returns the utility value from a given belief -""" -value(policy::Policy, b::Belief) - - +obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) ################################################################# -############################ Belief ############################# +####################### Distributions ########################### ################################################################# """ -Base type for an object representing some knowledge about the state (often a probability distribution) -""" -Belief +Abstract type for a probability distribution. + T: type over which distribution is over (state, action, or observation) """ -Base type for an object that defines how a belief should be updated -""" -BeliefUpdater +AbstractDistribution """ - update(updater::BeliefUpdater, belief_old::Belief, action::Action, obs::Observation) + rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) -Returns a new instance of an updated belief given the old belief (belief_old) and the latest action and observation +Fill sample with a random element from distribution d. The sample can be a state, action or observation. """ -update(updater::BeliefUpdater, belief_old::Belief, action::Action, obs::Observation) +rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) -""" - update(updater::BeliefUpdater, belief_old::Belief, action::Action, obs::Observation, belief_new::Belief) -Modifies belief_new to the belief given the old belief (belief_old) and the latest action and observation and returns -the updated belief """ -update(updater::BeliefUpdater, belief_old::Belief, action::Action, obs::Observation, belief_new::Belief) + rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) +Fill sample with a random element from space d. The sample can be a state, action or observation. """ - initial_belief(pomdp::POMDP) +rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) -Returns an initial belief for the pomdp """ -initial_belief + pdf{T}(d::AbstractDistribution{T}, x::T) +Value of probability distribution function at x """ - convert_belief(updater::BeliefUpdater, b::Belief) +pdf{T}(d::AbstractDistribution{T}, x::T) -Returns a belief that can be updated using updater that has a similar distribution to b -""" -convert_belief """ - updater(p::Policy) -Returns a default BeliefUpdater appropriate for the passed in policy -""" -updater - -################################################################# -############################ Simulation ######################### -################################################################# + create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) +Creates a transition distribution for model `pomdp`. This +could be a custom type, array, or any other sensible container. +The transition distirubtion is over states. """ -Base type for an object defining how a simulation should be carried out -""" -Simulator +create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) """ - simulate(simulator::Simulator, pomdp::POMDP, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) + create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) -Runs a simulation using the specified policy and returns the accumulated reward +Creates an observation distribution for model `pomdp`. This +could be a custom type, array, or any other sensible container. +The observation distirubtion is over observations. """ -simulate - +create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) ################################################################# -######################### Convenience ########################### -################################################################# - -""" - index(pomdp::POMDP, state::State) - -Returns the index of the given state for a discrete POMDP -""" -index(pomdp::POMDP, s::State) - -""" - iterator(space::AbstractSpace) - -Returns an iterator over a space -""" -iterator(space::AbstractSpace) - - - -################################################################# -############################ Creators ########################### +##################### Solvers and Policies ###################### ################################################################# """ - create_state(pomdp::POMDP) - -Creates a single state object (for preallocation purposes) -""" -create_state(pomdp::POMDP) - -""" - create_observation(pomdp::POMDP) - -Creates a single observation object (for preallocation purposes) -""" -create_observation(pomdp::POMDP) - -""" - create_transition_distribution(pomdp::POMDP) - -Returns a transition distribution -""" -create_transition_distribution(pomdp::POMDP) - -""" - create_observation_distribution(pomdp::POMDP) - -Returns an observation distribution -""" -create_observation_distribution(pomdp::POMDP) - -""" - create_policy(solver::Solver, pomdp::POMDP) - -Creates a policy object (for preallocation purposes) -""" -create_policy(solver::Solver, pomdp::POMDP) - -""" - create_action(pomdp::POMDP) - -Creates an action object (for preallocation purposes) -""" -create_action(pomdp::POMDP) - +Base type for an MDP/POMDP solver """ - create_belief(pomdp::POMDP) +Solver -Creates a belief either to be used by updater or pomdp -""" -create_belief(pomdp::POMDP) diff --git a/src/io.jl b/src/io.jl index 0bf46a51..cf627c2a 100644 --- a/src/io.jl +++ b/src/io.jl @@ -1,2 +1,2 @@ -# TODO (max): how to best handle io: HDF5? or something else +# TODO (max): do we need a unified API for io? diff --git a/src/policy.jl b/src/policy.jl index 4c8d7e36..8dadf11a 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -4,21 +4,61 @@ # The policy is extracted through calls to the action() function. ################################################################# +""" +Base type for a policy (a map from every possible belief, or more abstract policy state, to an optimal or suboptimal action) +""" abstract Policy{S,A,O} # creates an action object (for preallocation purposes) #@pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) -# returns a default BeliefUpdater appropriate for a belief type that policy `p` can use +""" + action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) + +Fills and returns action a for the current state, given the policy +""" @pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) + +""" + action(policy::Policy, s::State) + +Returns an action for the current state, given the policy + +""" @pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S) + +""" + action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}, a::A) + +Fills and returns action a for the current belief, given the policy +""" @pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}, a::A) + +""" + action{S,A,O}(policy::Policy{S,A,O}, belief::Belief{S}) + +Returns an action for the current belief, given the policy + +""" @pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}) -# returns a default BeliefUpdater appropriate for a belief type that policy `p` can use +""" + updater{S,A,O}(policy::Policy{S,A,O}) + +Returns a default BeliefUpdater appropriate for a belief type that policy `p` can use +""" @pomdp_func updater{S,A,O}(policy::Policy{S,A,O}) -# returns the utility value from policy p given the belief +""" + value{S,A,O}(p::Policy{S,A,O}, belief::Belief{S}) + +Returns the utility value from policy p given the belief +""" @pomdp_func value{S,A,O}(p::Policy{S,A,O}, belief::Belief{S}) -# returns the utility value from policy p given the state + +""" + value{S,A,O}(p::Policy{S,A,O}, state::S) + +Returns the utility value from policy p given the state +""" @pomdp_func value{S,A,O}(p::Policy{S,A,O}, state::S) diff --git a/src/pomdp.jl b/src/pomdp.jl index 75b4c52c..0adf3eaf 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -16,11 +16,32 @@ Abstract base type for a fully observable Markov decision process. """ abstract MDP{S,A} <: POMDP{S,A,S} +""" +Abstract type for a probability distribution. + + T: type over which distribution is over (state, action, or observation) +""" abstract AbstractDistribution{T} -# return the space sizes +""" + n_states{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns the number of states in `pomdp`. Used for discrete models only. +""" @pomdp_func n_states{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + n_actions{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns the number of actions in `pomdp`. Used for discrete models only. +""" @pomdp_func n_actions{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + n_observations{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns the number of actions in `pomdp`. Used for discrete models only. +""" @pomdp_func n_observations{S,A,O}(pomdp::POMDP{S,A,O}) """ @@ -30,20 +51,78 @@ Return the discount factor for the problem. """ @pomdp_func discount{S,A,O}(pomdp::POMDP{S,A,O}) +""" + transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, +distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) + +Returns the transition distribution from the current state-action pair +""" @pomdp_func transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) + +""" + observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + +Returns the observation distribution for the s-a-s' tuple (state, action, and next state) +""" @pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + +""" + observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + +Modifies distribution to the observation distribution for the s-a-s' tuple (state, action, and next state) and returns it +""" @pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + +""" + reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) + +Returns the immediate reward for the s-a-s' triple +""" @pomdp_func reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) +""" + reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) + +Returns the immediate reward for the s-a pair +""" +@pomdp_func reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) + #@pomdp_func create_state{S,A,O}(pomdp::POMDP{S,A,O}) #@pomdp_func create_observation{S,A,O}(pomdp::POMDP{S,A,O}) +""" + isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) + +Checks if state s is terminal +""" @pomdp_func isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) = false + +""" + isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) +Checks if an observation is terminal. +""" @pomdp_func isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) = false # @pomdp_func isterminal(pomdp::POMDP, observation::Any) = false # @pomdp_func isterminal_obs(pomdp::POMDP, state::Any) = false +""" + state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) + +Returns the integer index of state `s`. Used for discrete models only. +""" @pomdp_func state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) + +""" + action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) + +Returns the integer index of action `a`. Used for discrete models only. +""" @pomdp_func action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) + +""" + obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) + +Returns the integer index of observation `o`. Used for discrete models only. +""" @pomdp_func obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) diff --git a/src/simulator.jl b/src/simulator.jl index ed9d31a1..a6802135 100644 --- a/src/simulator.jl +++ b/src/simulator.jl @@ -4,8 +4,14 @@ # creating Simulator types ################################################################# -# Base type for an object defining how a simulation should be carried out +""" +Base type for an object defining how a simulation should be carried out +""" abstract Simulator{S,A,O} -# runs a simulation using the specified policy and returns the accumulated reward +""" + simulate{S,A,O}(simulator::Simulator{S,A,O}, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) + +Runs a simulation using the specified policy and returns the accumulated reward +""" @pomdp_func simulate{S,A,O}(simulator::Simulator{S,A,O}, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) diff --git a/src/solver.jl b/src/solver.jl index 799c628f..6157f967 100644 --- a/src/solver.jl +++ b/src/solver.jl @@ -1,5 +1,19 @@ +""" +Base type for an MDP/POMDP solver +""" abstract Solver{S,A,O} +""" + create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) + +Creates a policy object (for preallocation purposes) +""" create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) = error("$(typeof(pomdp)) does not implement create_policy") + +""" + solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) + +Solves the POMDP using method associated with solver, and returns a policy. +""" solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) = error("$(typeof(solver)) does not implement solve for model $(typeof(pomdp))") diff --git a/src/space.jl b/src/space.jl index 40869305..834c0c61 100644 --- a/src/space.jl +++ b/src/space.jl @@ -3,24 +3,82 @@ # AbstractSpace: the abstract super type for the state, action and observation spaces ################################################################# +""" +Base type for state, action and observation spaces. + + T: type that parametarizes the space (state, action, or observation) +""" abstract AbstractSpace{T} -# returns an integer +""" + dimensions{T}(s::AbstractSpace{T}) + +Returns the number of dimensions in space `s`. +""" @pomdp_func dimensions{T}(s::AbstractSpace{T}) -# returns bound of dim i + +""" + rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, sample::T) + +Returns a random `sample` from space `s`. +""" +@pomdp_func rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, sample::T) + +""" + iterator{T}(s::AbstractSpace{T}) + +Returns an iterable type (array or custom iterator) corresponding to space `s`. +""" +@pomdp_func iterator{T}(s::AbstractSpace{T}) + @pomdp_func lowerbound{T}(s::AbstractSpace{T}, i::Int) -# returns bound of dim i @pomdp_func upperbound{T}(s::AbstractSpace{T}, i::Int) -# sample a space and return the sample -@pomdp_func rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) -# return an iterable object corresponding to the space -@pomdp_func iterator{T}(s::AbstractSpace{T}) -# return a space type +""" + states{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns the complete state space of a POMDP. +""" @pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + states{S,A,O}(pomdp::POMDP{S,A,O}, state::S) + +Returns a subset of the state space reachable from `state`. +""" @pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}, state::S, sts::AbstractSpace{S}=states(pomdp)) + +""" + actions{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns the entire action space of a POMDP. +""" @pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) + +Modifies aspace to the action space accessible from the given state and returns it. +""" @pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, acts::AbstractSpace{A}=actions(pomdp)) + +""" + actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, aspace::AbstractSpace{A}) + +Modifies aspace to the action space accessible from the states with nonzero belief and returns it. +""" @pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, acts::AbstractSpace{A}=actions(pomdp)) + +""" + observations{S,A,O}(pomdp::POMDP{S,A,O}) + +Returns the entire observation space. +""" @pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}) + +""" + observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) + +Modifies ospace to the observation space accessible from the given state and returns it. +""" @pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) diff --git a/src/utils.jl b/src/utils.jl index 045f2c02..b4f61479 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,4 +1,13 @@ -# adds a registered solver +""" + add(solver_name::AbstractString) + +Downloads and installs a registered solver with name `solver_name`. +This function is not exported, and must be called: +```julia +julia> using POMDPs +julia> POMDPs.add("MCTS") +``` +""" function add(solver_name::AbstractString) @assert solver_name in SUPPORTED_SOLVERS string("The solver: ", solver_name, " is not supported") full_url = string(REMOTE_URL, solver_name, ".jl") From 9243bb1f2c23a7fc619ed175989ffdeae168d183 Mon Sep 17 00:00:00 2001 From: Maxim Egorov Date: Wed, 6 Apr 2016 09:23:54 -0700 Subject: [PATCH 17/30] Update .travis.yml added documenter script to travis --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index ee8cecd1..acc64843 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,11 @@ julia: - release notifications: email: false +before_script: + - export PATH=$HOME/.local/bin:$PATH script: - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.test("POMDPs")' +after_success: + - julia -e 'Pkg.clone("https://github.com/MichaelHatherly/Documenter.jl")' + - julia -e 'cd(Pkg.dir("PACKAGE_NAME")); include(joinpath("docs", "make.jl"))' From dea7f3ea544d375503d07ebe0303dc26e5d72f37 Mon Sep 17 00:00:00 2001 From: Maxim Egorov Date: Wed, 6 Apr 2016 09:25:21 -0700 Subject: [PATCH 18/30] added gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..bbcba11f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +docs/build/ +docs/site/ From 96b4755c1d086901fcfc922ddf1d36f13d95c2de Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Wed, 6 Apr 2016 10:18:44 -0700 Subject: [PATCH 19/30] removed unnecessary documentation --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index d45245be..04c9f4d3 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,6 @@ using POMDPs # the following command adds the SARSOP solver, you can add any supported solver this way POMDPs.add("SARSOP") ``` -## Documentation - -TODO: Link to documentation ## Tutorials From ee33ca895e6b5c1068014746ea096b557b515f48 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 8 Apr 2016 13:34:41 -0700 Subject: [PATCH 20/30] changed the observation arg order to a-s'. (see #25) --- src/pomdp.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pomdp.jl b/src/pomdp.jl index 21ff9a00..9a70aa49 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -67,11 +67,11 @@ Returns the observation distribution for the s-a-s' tuple (state, action, and ne @pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) """ - observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) -Modifies distribution to the observation distribution for the s-a-s' tuple (state, action, and next state) and returns it +Modifies distribution to the observation distribution for the a-s' tuple (action and next state) and returns it """ -@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) +@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) """ reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) @@ -99,6 +99,7 @@ Checks if state s is terminal """ isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) + Checks if an observation is terminal. """ @pomdp_func isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) = false From 824ce094f1cdcefc6e0bad3bf746147dacf315a3 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 8 Apr 2016 14:34:17 -0700 Subject: [PATCH 21/30] got rid of docs.jl --- src/docs.jl | 225 ---------------------------------------------------- 1 file changed, 225 deletions(-) delete mode 100644 src/docs.jl diff --git a/src/docs.jl b/src/docs.jl deleted file mode 100644 index 39351e4f..00000000 --- a/src/docs.jl +++ /dev/null @@ -1,225 +0,0 @@ -""" -Provides a basic interface for working with MDPs/POMDPs -""" -POMDPs - -################################################################# -####################### Problem Model ########################### -################################################################# - - -""" -Base type for state, action and observation spaces. - - T: type that parametarizes the space (state, action, or observation) -""" -AbstractSpace - - -""" - states{S,A,O}(pomdp::POMDP{S,A,O}) - -Returns the complete state space of a POMDP. -""" -states - - -""" - actions{S,A,O}(pomdp::POMDP{S,A,O}) - -Returns the entire action space of a POMDP. -""" -actions{S,A,O}(pomdp::POMDP{S,A,O}) - - -""" - actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) - -Modifies aspace to the action space accessible from the given state and returns it. -""" -actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) - - -""" - actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, aspace::AbstractSpace{A}) - -Modifies aspace to the action space accessible from the states with nonzero belief and returns it. -""" -actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, aspace::AbstractSpace{A}) - - -""" - observations{S,A,O}(pomdp::POMDP{S,A,O}) - -Returns the entire observation space. -""" -observations{S,A,O}(pomdp::POMDP{S,A,O}) - -""" - observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) - -Modifies ospace to the observation space accessible from the given state and returns it. -""" -observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) - - -""" - reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) - -Returns the immediate reward for the s-a-s' triple -""" -reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) - - -""" - reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) - -Returns the immediate reward for the s-a pair -""" -reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) - - -""" - transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, -distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) - -Returns the transition distribution from the current state-action pair -""" -transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, -distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) - - -""" - observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) - -Returns the observation distribution for the s-a-s' tuple (state, action, and next state) -""" -observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) - -""" - observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) - -Modifies distribution to the observation distribution for the s-a-s' tuple (state, action, and next state) and returns it -""" -observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) - - -""" - isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) - -Checks if state s is terminal -""" -isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) - - -""" - isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) -Checks if an observation is terminal. -""" -isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) - - -""" - n_states{S,A,O}(pomdp::POMDP{S,A,O}) - -Returns the number of states in `pomdp`. Used for discrete models only. -""" -n_states{S,A,O}(pomdp::POMDP{S,A,O}) - -""" - n_actions{S,A,O}(pomdp::POMDP{S,A,O}) - -Returns the number of actions in `pomdp`. Used for discrete models only. -""" -n_actions{S,A,O}(pomdp::POMDP{S,A,O}) - -""" - n_observations{S,A,O}(pomdp::POMDP{S,A,O}) - -Returns the number of actions in `pomdp`. Used for discrete models only. -""" -n_observations{S,A,O}(pomdp::POMDP{S,A,O}) - -""" - state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) - -Returns the integer index of state `s`. Used for discrete models only. -""" -state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) - -""" - action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) - -Returns the integer index of action `a`. Used for discrete models only. -""" -action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) - -""" - obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) - -Returns the integer index of observation `o`. Used for discrete models only. -""" -obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) - -################################################################# -####################### Distributions ########################### -################################################################# - -""" -Abstract type for a probability distribution. - - T: type over which distribution is over (state, action, or observation) -""" -AbstractDistribution - -""" - rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) - -Fill sample with a random element from distribution d. The sample can be a state, action or observation. -""" -rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) - - -""" - rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) - -Fill sample with a random element from space d. The sample can be a state, action or observation. -""" -rand{T}(rng::AbstractRNG, d::AbstractSpace{T}, state::T) - -""" - pdf{T}(d::AbstractDistribution{T}, x::T) - -Value of probability distribution function at x -""" -pdf{T}(d::AbstractDistribution{T}, x::T) - - -""" - create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) - -Creates a transition distribution for model `pomdp`. This -could be a custom type, array, or any other sensible container. -The transition distirubtion is over states. -""" -create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) - -""" - create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) - -Creates an observation distribution for model `pomdp`. This -could be a custom type, array, or any other sensible container. -The observation distirubtion is over observations. -""" -create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) - - -################################################################# -##################### Solvers and Policies ###################### -################################################################# - -""" -Base type for an MDP/POMDP solver -""" -Solver - From 56ad3e315380c1fefca01e3465ab91c28c01d919 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 8 Apr 2016 16:40:16 -0700 Subject: [PATCH 22/30] added create_state, etc. back (see #69) --- src/default_constructors.jl | 3 +++ src/policy.jl | 9 +++++++-- src/pomdp.jl | 29 +++++++++++++++++++++-------- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/default_constructors.jl b/src/default_constructors.jl index beb3bcb0..f27e108b 100644 --- a/src/default_constructors.jl +++ b/src/default_constructors.jl @@ -1,5 +1,8 @@ # implements some default zero-argument constructors for bitstypes that do not have them (see issue #65) +# don't need these anymore since create_state was brought back +#= Base.Bool() = zero(Bool) Base.Int() = zero(Int) Base.Float64() = zero(Float64) +=# diff --git a/src/policy.jl b/src/policy.jl index 8dadf11a..543b8028 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -9,8 +9,13 @@ Base type for a policy (a map from every possible belief, or more abstract polic """ abstract Policy{S,A,O} -# creates an action object (for preallocation purposes) -#@pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) +""" + create_action{S,A,O}(pomdp::POMDP{S,A,O}) + +Creates an action object (for preallocation purposes) +""" +@pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) +create_action{S,A<:Number,O}(pomdp::POMDP{S,A,O}) = zero(A) """ action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) diff --git a/src/pomdp.jl b/src/pomdp.jl index 9a70aa49..e35f25a9 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -60,18 +60,18 @@ Returns the transition distribution from the current state-action pair @pomdp_func transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) """ - observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) -Returns the observation distribution for the s-a-s' tuple (state, action, and next state) +Modifies distribution to the observation distribution for the a-s' tuple (action and next state) and returns it """ -@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) +@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) """ - observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) -Modifies distribution to the observation distribution for the a-s' tuple (action and next state) and returns it +Returns the observation distribution for the s-a-s' tuple (state, action, and next state) """ -@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) +@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}) # removed =create_observation_distribution(pomdp) to resolve ambiguity - problems should still implement this with an optional 5th argument """ reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) @@ -87,8 +87,21 @@ Returns the immediate reward for the s-a pair """ @pomdp_func reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) -#@pomdp_func create_state{S,A,O}(pomdp::POMDP{S,A,O}) -#@pomdp_func create_observation{S,A,O}(pomdp::POMDP{S,A,O}) +""" + create_state{S,A,O}(pomdp::POMDP{S,A,O}) + +Create a state object (for preallocation purposes). +""" +@pomdp_func create_state{S,A,O}(pomdp::POMDP{S,A,O}) +create_state{S<:Number,A,O}(pomdp::POMDP{S,A,O}) = zero(S) + +""" + create_observation{S,A,O}(pomdp::POMDP{S,A,O}) + +Create an observation object (for preallocation purposes). +""" +@pomdp_func create_observation{S,A,O}(pomdp::POMDP{S,A,O}) +create_observation{S,A,O<:Number}(pomdp::POMDP{S,A,O}) = zero(O) """ isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) From c6a36ded776d4f2454f6cf122ba79ae90cb5a343 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 8 Apr 2016 17:22:44 -0700 Subject: [PATCH 23/30] exported create_ functions --- src/POMDPs.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index b886f4bc..8ad4c41d 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -46,6 +46,9 @@ export upperbound, getindex, iterator, + create_state, + create_action, + create_observation, create_transition_distribution, create_observation_distribution, create_belief, From 330b152f27a88ebff44890faff0a6f7086d7d686 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 9 Apr 2016 17:36:20 -0700 Subject: [PATCH 24/30] removed @pomdp_func from a docstring --- src/belief.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/belief.jl b/src/belief.jl index e9833f1a..c9ebb22a 100644 --- a/src/belief.jl +++ b/src/belief.jl @@ -40,7 +40,7 @@ Creates a belief object of the type used by `updater` (preallocates memory) @pomdp_func create_belief{S,A,O}(updater::BeliefUpdater{S,A,O}) """ - @pomdp_func update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, + update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, belief_new::Belief{S}=create_belief(updater)) Returns a new instance of an updated belief given `belief_old` and the latest action and observation. From 3bed3fbae8681199028af04a47216105bbc6307c Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 9 Apr 2016 19:37:52 -0700 Subject: [PATCH 25/30] De-parameterized Policy and Simulator according to #68 (see also #70) --- src/POMDPs.jl | 6 ------ src/policy.jl | 43 ++++++++++++++++++++++++++----------------- src/simulator.jl | 6 +++--- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 8ad4c41d..2639e5e8 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -58,11 +58,6 @@ export Solver, solve, - #= - # Rewards - Reward, - =# - # Beliefs Belief, BeliefUpdater, @@ -96,4 +91,3 @@ include("policy.jl") include("simulator.jl") end - diff --git a/src/policy.jl b/src/policy.jl index 543b8028..0ae545da 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -7,7 +7,7 @@ """ Base type for a policy (a map from every possible belief, or more abstract policy state, to an optimal or suboptimal action) """ -abstract Policy{S,A,O} +abstract Policy """ create_action{S,A,O}(pomdp::POMDP{S,A,O}) @@ -18,52 +18,61 @@ Creates an action object (for preallocation purposes) create_action{S,A<:Number,O}(pomdp::POMDP{S,A,O}) = zero(A) """ - action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) + action(p::Policy, state_or_belief, action) -Fills and returns action a for the current state, given the policy +Fills and returns action based on the current state or belief, given the policy """ -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S, a::A) +@pomdp_func action(policy::Policy, state_or_belief, action) """ - action(policy::Policy, s::State) + action(policy::Policy, state_or_belief) -Returns an action for the current state, given the policy +Returns an action for the current state or belief, given the policy """ -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, s::S) +@pomdp_func action(policy::Policy, state_or_belief) +# removed because of #70 +#= """ - action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}, a::A) + action{S,A}(p::Policy, b::Belief{S}, a::A) Fills and returns action a for the current belief, given the policy """ -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}, a::A) +@pomdp_func action{S,A}(p::Policy, b::Belief{S}, a::A) +=# +# removed because of #70 +#= """ - action{S,A,O}(policy::Policy{S,A,O}, belief::Belief{S}) + action{S}(policy::Policy, belief::Belief{S}) Returns an action for the current belief, given the policy """ -@pomdp_func action{S,A,O}(p::Policy{S,A,O}, b::Belief{S}) +@pomdp_func action(p::Policy, b::Belief) +=# """ - updater{S,A,O}(policy::Policy{S,A,O}) + updater(policy::Policy) Returns a default BeliefUpdater appropriate for a belief type that policy `p` can use """ -@pomdp_func updater{S,A,O}(policy::Policy{S,A,O}) +@pomdp_func updater(policy::Policy) +# removed because of #70 +#= """ - value{S,A,O}(p::Policy{S,A,O}, belief::Belief{S}) + value(p::Policy, belief::Belief) Returns the utility value from policy p given the belief """ -@pomdp_func value{S,A,O}(p::Policy{S,A,O}, belief::Belief{S}) +@pomdp_func value(p::Policy, belief::Belief) +=# """ - value{S,A,O}(p::Policy{S,A,O}, state::S) + value{S}(p::Policy, state_or_belief) Returns the utility value from policy p given the state """ -@pomdp_func value{S,A,O}(p::Policy{S,A,O}, state::S) +@pomdp_func value(p::Policy, state_or_belief) diff --git a/src/simulator.jl b/src/simulator.jl index a6802135..5e3dbda7 100644 --- a/src/simulator.jl +++ b/src/simulator.jl @@ -7,11 +7,11 @@ """ Base type for an object defining how a simulation should be carried out """ -abstract Simulator{S,A,O} +abstract Simulator """ - simulate{S,A,O}(simulator::Simulator{S,A,O}, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) + simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) Runs a simulation using the specified policy and returns the accumulated reward """ -@pomdp_func simulate{S,A,O}(simulator::Simulator{S,A,O}, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) +@pomdp_func simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) # Note: I got rid of the parameters for BeliefUpdater and Belief to resolve conflicting definition errors From dc812474a0ae0ea3d89b24ea278341fee88e2c0b Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Thu, 14 Apr 2016 19:42:05 -0700 Subject: [PATCH 26/30] got rid of default constructors --- src/POMDPs.jl | 1 - src/default_constructors.jl | 8 -------- 2 files changed, 9 deletions(-) delete mode 100644 src/default_constructors.jl diff --git a/src/POMDPs.jl b/src/POMDPs.jl index 2639e5e8..32f40632 100644 --- a/src/POMDPs.jl +++ b/src/POMDPs.jl @@ -81,7 +81,6 @@ export include("errors.jl") include("constants.jl") include("utils.jl") -include("default_constructors.jl") include("pomdp.jl") include("distribution.jl") include("belief.jl") diff --git a/src/default_constructors.jl b/src/default_constructors.jl deleted file mode 100644 index f27e108b..00000000 --- a/src/default_constructors.jl +++ /dev/null @@ -1,8 +0,0 @@ -# implements some default zero-argument constructors for bitstypes that do not have them (see issue #65) - -# don't need these anymore since create_state was brought back -#= -Base.Bool() = zero(Bool) -Base.Int() = zero(Int) -Base.Float64() = zero(Float64) -=# From c205d68da0d38c4a04349897366948f6999a5298 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Thu, 14 Apr 2016 20:37:32 -0700 Subject: [PATCH 27/30] deparameterized, closes #71 --- src/belief.jl | 26 ++++++++++++-------------- src/policy.jl | 32 ++------------------------------ src/pomdp.jl | 4 ++++ src/simulator.jl | 4 ++-- src/solver.jl | 10 +++++----- src/space.jl | 4 ++-- 6 files changed, 27 insertions(+), 53 deletions(-) diff --git a/src/belief.jl b/src/belief.jl index c9ebb22a..6d0c694a 100644 --- a/src/belief.jl +++ b/src/belief.jl @@ -8,22 +8,20 @@ """ Abstract type for an object representing some knowledge about the state (often a probability distribution) - - T: the type over which the belief is over (e.g. state) """ -abstract Belief{T} <: AbstractDistribution{T} +abstract Belief """ Abstract type for an object that defines how a belief should be updated """ -abstract BeliefUpdater{S,A,O} +abstract BeliefUpdater """ - initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S} = create_belief(pomdp)) + initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief = create_belief(pomdp)) Returns an initial belief for the pomdp. """ -@pomdp_func initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S} = create_belief(pomdp)) +@pomdp_func initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief = create_belief(pomdp)) """ create_belief(pomdp::POMDP) @@ -33,25 +31,25 @@ Creates a belief either to be used by updater or pomdp @pomdp_func create_belief{S,A,O}(pomdp::POMDP{S,A,O}) """ - create_belief{S,A,O}(updater::BeliefUpdater{S,A,O}) + create_belief(updater::BeliefUpdater) Creates a belief object of the type used by `updater` (preallocates memory) """ -@pomdp_func create_belief{S,A,O}(updater::BeliefUpdater{S,A,O}) +@pomdp_func create_belief(updater::BeliefUpdater) """ - update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, - belief_new::Belief{S}=create_belief(updater)) + update(updater::BeliefUpdater, belief_old::Belief, action, obs, + belief_new::Belief=create_belief(updater)) Returns a new instance of an updated belief given `belief_old` and the latest action and observation. """ -@pomdp_func update{S,A,O}(updater::BeliefUpdater{S,A,O}, belief_old::Belief{S}, action::A, obs::O, belief_new::Belief{S}=create_belief(updater)) +@pomdp_func update(updater::BeliefUpdater, belief_old::Belief, action::Any, obs::Any, belief_new::Belief=create_belief(updater)) # returns a belief that can be updated using `updater` that has a similar distribution to `b` (this conversion may be lossy) """ - convert_belief{S,A,O}(updater::BeliefUpdater{S,A,O}, belief::Belief{S}, - new_belief::Belief{S}=create_belief(updater)) = belief + convert_belief(updater::BeliefUpdater, belief::Belief, + new_belief::Belief=create_belief(updater)) = belief Returns a belief that can be updated using `updater` that has a similar distribution to `belief`. """ -@pomdp_func convert_belief{S,A,O}(updater::BeliefUpdater{S,A,O}, belief::Belief{S}, new_belief::Belief{S}=create_belief(updater)) = belief +@pomdp_func convert_belief(updater::BeliefUpdater, belief::Belief, new_belief::Belief=create_belief(updater)) = belief diff --git a/src/policy.jl b/src/policy.jl index 0ae545da..c6d4aa4c 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -15,6 +15,8 @@ abstract Policy Creates an action object (for preallocation purposes) """ @pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) + +# default implementation for numeric types create_action{S,A<:Number,O}(pomdp::POMDP{S,A,O}) = zero(A) """ @@ -32,26 +34,6 @@ Returns an action for the current state or belief, given the policy """ @pomdp_func action(policy::Policy, state_or_belief) -# removed because of #70 -#= -""" - action{S,A}(p::Policy, b::Belief{S}, a::A) - -Fills and returns action a for the current belief, given the policy -""" -@pomdp_func action{S,A}(p::Policy, b::Belief{S}, a::A) -=# - -# removed because of #70 -#= -""" - action{S}(policy::Policy, belief::Belief{S}) - -Returns an action for the current belief, given the policy - -""" -@pomdp_func action(p::Policy, b::Belief) -=# """ updater(policy::Policy) @@ -60,16 +42,6 @@ Returns a default BeliefUpdater appropriate for a belief type that policy `p` ca """ @pomdp_func updater(policy::Policy) -# removed because of #70 -#= -""" - value(p::Policy, belief::Belief) - -Returns the utility value from policy p given the belief -""" -@pomdp_func value(p::Policy, belief::Belief) -=# - """ value{S}(p::Policy, state_or_belief) diff --git a/src/pomdp.jl b/src/pomdp.jl index e35f25a9..690cc1a7 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -93,6 +93,8 @@ Returns the immediate reward for the s-a pair Create a state object (for preallocation purposes). """ @pomdp_func create_state{S,A,O}(pomdp::POMDP{S,A,O}) + +# default implementation for numeric types create_state{S<:Number,A,O}(pomdp::POMDP{S,A,O}) = zero(S) """ @@ -101,6 +103,8 @@ create_state{S<:Number,A,O}(pomdp::POMDP{S,A,O}) = zero(S) Create an observation object (for preallocation purposes). """ @pomdp_func create_observation{S,A,O}(pomdp::POMDP{S,A,O}) + +# default implementation for numeric types create_observation{S,A,O<:Number}(pomdp::POMDP{S,A,O}) = zero(O) """ diff --git a/src/simulator.jl b/src/simulator.jl index 5e3dbda7..52329dba 100644 --- a/src/simulator.jl +++ b/src/simulator.jl @@ -10,8 +10,8 @@ Base type for an object defining how a simulation should be carried out abstract Simulator """ - simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy{S,A,O}, updater::BeliefUpdater{S,A,O}, initial_belief::Belief{S}) + simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) Runs a simulation using the specified policy and returns the accumulated reward """ -@pomdp_func simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) # Note: I got rid of the parameters for BeliefUpdater and Belief to resolve conflicting definition errors +@pomdp_func simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) diff --git a/src/solver.jl b/src/solver.jl index 6157f967..09d8cec1 100644 --- a/src/solver.jl +++ b/src/solver.jl @@ -2,18 +2,18 @@ """ Base type for an MDP/POMDP solver """ -abstract Solver{S,A,O} +abstract Solver """ - create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) + create_policy{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}) Creates a policy object (for preallocation purposes) """ -create_policy{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}) = error("$(typeof(pomdp)) does not implement create_policy") +@pomdp_func create_policy{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}) """ - solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) + solve{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) Solves the POMDP using method associated with solver, and returns a policy. """ -solve{S,A,O}(solver::Solver{S,A,O}, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) = error("$(typeof(solver)) does not implement solve for model $(typeof(pomdp))") +@pomdp_func solve{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) diff --git a/src/space.jl b/src/space.jl index 834c0c61..1a05e3b1 100644 --- a/src/space.jl +++ b/src/space.jl @@ -63,11 +63,11 @@ Modifies aspace to the action space accessible from the given state and returns @pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, acts::AbstractSpace{A}=actions(pomdp)) """ - actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, aspace::AbstractSpace{A}) + actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief, aspace::AbstractSpace{A}) Modifies aspace to the action space accessible from the states with nonzero belief and returns it. """ -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief{S}, acts::AbstractSpace{A}=actions(pomdp)) +@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief, acts::AbstractSpace{A}=actions(pomdp)) """ observations{S,A,O}(pomdp::POMDP{S,A,O}) From e723d103102521db704410f8ed51ce2da2e7e140 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Thu, 14 Apr 2016 22:07:58 -0700 Subject: [PATCH 28/30] split POMDPs and MDPs. fixes #60 and fixes #70 --- src/belief.jl | 6 +-- src/distribution.jl | 12 +++--- src/policy.jl | 27 ++++++++----- src/pomdp.jl | 96 ++++++++++++++++++++++++--------------------- src/simulator.jl | 5 ++- src/solver.jl | 9 +++-- src/space.jl | 32 ++++++++------- 7 files changed, 105 insertions(+), 82 deletions(-) diff --git a/src/belief.jl b/src/belief.jl index 6d0c694a..a72cffe8 100644 --- a/src/belief.jl +++ b/src/belief.jl @@ -17,18 +17,18 @@ Abstract type for an object that defines how a belief should be updated abstract BeliefUpdater """ - initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief = create_belief(pomdp)) + initial_belief(pomdp::POMDP, belief::Belief = create_belief(pomdp)) Returns an initial belief for the pomdp. """ -@pomdp_func initial_belief{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief = create_belief(pomdp)) +@pomdp_func initial_belief(pomdp::POMDP, belief::Belief = create_belief(pomdp)) """ create_belief(pomdp::POMDP) Creates a belief either to be used by updater or pomdp """ -@pomdp_func create_belief{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_belief(pomdp::POMDP) """ create_belief(updater::BeliefUpdater) diff --git a/src/distribution.jl b/src/distribution.jl index 35279fc4..244234eb 100644 --- a/src/distribution.jl +++ b/src/distribution.jl @@ -5,18 +5,20 @@ ################################################################# """ - create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) + create_transition_distribution(problem::POMDP) + create_transition_distribution(problem::MDP) Returns a transition distribution (for memory preallocation). """ -@pomdp_func create_transition_distribution{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_transition_distribution(problem::Union{POMDP,MDP}) """ - create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) + create_observation_distribution(problem::POMDP) + create_observation_distribution(problem::MDP) Returns an observation distribution (for memory preallocation). """ -@pomdp_func create_observation_distribution{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_observation_distribution(problem::Union{POMDP,MDP}) """ rand{T}(rng::AbstractRNG, d::AbstractDistribution{T}, sample::T) @@ -39,4 +41,4 @@ Value of probability distribution `d` function at sample `x`. abstract DiscreteDistribution{T} <: AbstractDistribution{T} @pomdp_func Base.length{T}(d::DiscreteDistribution{T}) @pomdp_func weight{T}(d::DiscreteDistribution{T}, i::Int) -@pomdp_func index{S,A,O,T}(pomdp::POMDP{S,A,O}, d::DiscreteDistribution{T}, i::Int) +@pomdp_func index{T}(problem::Union{POMDP,MDP}, d::DiscreteDistribution{T}, i::Int) diff --git a/src/policy.jl b/src/policy.jl index c6d4aa4c..bddf5870 100644 --- a/src/policy.jl +++ b/src/policy.jl @@ -10,29 +10,35 @@ Base type for a policy (a map from every possible belief, or more abstract polic abstract Policy """ - create_action{S,A,O}(pomdp::POMDP{S,A,O}) + create_action(problem::POMDP) + create_action(problem::MDP) Creates an action object (for preallocation purposes) """ -@pomdp_func create_action{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_action(problem::Union{POMDP,MDP}) # default implementation for numeric types -create_action{S,A<:Number,O}(pomdp::POMDP{S,A,O}) = zero(A) +create_action{S,A<:Number}(problem::Union{POMDP{S,A},MDP{S,A}}) = zero(A) """ - action(p::Policy, state_or_belief, action) + action(p::Policy, x::Any, action) + action(p::Policy, x::Belief, action) -Fills and returns action based on the current state or belief, given the policy +Fills and returns action based on the current state or belief, given the policy. + +If an MDP is being simulated, x will be a state; if a POMDP is being simulated, x will be a Belief """ -@pomdp_func action(policy::Policy, state_or_belief, action) +@pomdp_func action(policy::Policy, x::Any, action::Any) """ - action(policy::Policy, state_or_belief) + action(policy::Policy, x::Any) + action(policy::Policy, x::Belief) Returns an action for the current state or belief, given the policy +If an MDP is being simulated, x will be a state; if a POMDP is being simulated, x will be a Belief """ -@pomdp_func action(policy::Policy, state_or_belief) +@pomdp_func action(policy::Policy, x::Any) """ @@ -43,8 +49,9 @@ Returns a default BeliefUpdater appropriate for a belief type that policy `p` ca @pomdp_func updater(policy::Policy) """ - value{S}(p::Policy, state_or_belief) + value{S}(p::Policy, x::Any) + value{S}(p::Policy, x::Belief) Returns the utility value from policy p given the state """ -@pomdp_func value(p::Policy, state_or_belief) +@pomdp_func value(p::Policy, x::Any) diff --git a/src/pomdp.jl b/src/pomdp.jl index 690cc1a7..54d9cc19 100644 --- a/src/pomdp.jl +++ b/src/pomdp.jl @@ -14,7 +14,7 @@ Abstract base type for a fully observable Markov decision process. S: state type A: action type """ -abstract MDP{S,A} <: POMDP{S,A,S} +abstract MDP{S,A} """ Abstract type for a probability distribution. @@ -24,123 +24,131 @@ Abstract type for a probability distribution. abstract AbstractDistribution{T} """ - n_states{S,A,O}(pomdp::POMDP{S,A,O}) + n_states(problem::POMDP) + n_states(problem::MDP) -Returns the number of states in `pomdp`. Used for discrete models only. +Returns the number of states in `problem`. Used for discrete models only. """ -@pomdp_func n_states{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func n_states(problem::Union{POMDP,MDP}) """ - n_actions{S,A,O}(pomdp::POMDP{S,A,O}) + n_actions(problem::POMDP) + n_actions(problem::MDP) -Returns the number of actions in `pomdp`. Used for discrete models only. +Returns the number of actions in `problem`. Used for discrete models only. """ -@pomdp_func n_actions{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func n_actions(problem::Union{POMDP,MDP}) """ - n_observations{S,A,O}(pomdp::POMDP{S,A,O}) + n_observations(problem::POMDP) -Returns the number of actions in `pomdp`. Used for discrete models only. +Returns the number of actions in `problem`. Used for discrete models only. """ -@pomdp_func n_observations{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func n_observations(problem::POMDP) """ - discount{S,A,O}(pomdp::POMDP{S,A,O}) + discount(problem::POMDP) + discount(problem::MDP) Return the discount factor for the problem. """ -@pomdp_func discount(pomdp::POMDP) +@pomdp_func discount(problem::Union{POMDP,MDP}) """ - transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, -distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) + transition{S,A,O}(problem::POMDP{S,A,O}, state::S, action::A, +distribution::AbstractDistribution{S}=create_transition_distribution(problem)) + transition{S,A}(problem::MDP{S,A}, state::S, action::A, +distribution::AbstractDistribution{S}=create_transition_distribution(problem)) Returns the transition distribution from the current state-action pair """ -@pomdp_func transition{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(pomdp)) +@pomdp_func transition{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, state::S, action::A, distribution::AbstractDistribution{S}=create_transition_distribution(problem)) """ - observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + observation{S,A,O}(problem::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(problem)) Modifies distribution to the observation distribution for the a-s' tuple (action and next state) and returns it """ -@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) +@pomdp_func observation{S,A,O}(problem::POMDP{S,A,O}, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(problem)) """ - observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(pomdp)) + observation{S,A,O}(problem::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}=create_observation_distribution(problem)) Returns the observation distribution for the s-a-s' tuple (state, action, and next state) """ -@pomdp_func observation{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}) # removed =create_observation_distribution(pomdp) to resolve ambiguity - problems should still implement this with an optional 5th argument +@pomdp_func observation{S,A,O}(problem::POMDP{S,A,O}, state::S, action::A, statep::S, distribution::AbstractDistribution{O}) # removed =create_observation_distribution(problem) to resolve ambiguity - problems should still implement this with an optional 5th argument """ - reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) + reward{S,A,O}(problem::POMDP{S,A,O}, state::S, action::A, statep::S) + reward{S,A}(problem::MDP{S,A}, state::S, action::A, statep::S) Returns the immediate reward for the s-a-s' triple """ -@pomdp_func reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A, statep::S) +@pomdp_func reward{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, state::S, action::A, statep::S) """ - reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) + reward{S,A,O}(problem::POMDP{S,A,O}, state::S, action::A) + reward{S,A}(problem::MDP{S,A}, state::S, action::A) Returns the immediate reward for the s-a pair """ -@pomdp_func reward{S,A,O}(pomdp::POMDP{S,A,O}, state::S, action::A) +@pomdp_func reward{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, state::S, action::A) """ - create_state{S,A,O}(pomdp::POMDP{S,A,O}) + create_state(problem::POMDP) + create_state(problem::MDP) Create a state object (for preallocation purposes). """ -@pomdp_func create_state{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_state(problem::Union{POMDP,MDP}) # default implementation for numeric types -create_state{S<:Number,A,O}(pomdp::POMDP{S,A,O}) = zero(S) +create_state{S<:Number,A}(problem::Union{POMDP{S,A},MDP{S,A}}) = zero(S) """ - create_observation{S,A,O}(pomdp::POMDP{S,A,O}) + create_observation(problem::POMDP) Create an observation object (for preallocation purposes). """ -@pomdp_func create_observation{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func create_observation(problem::POMDP) # default implementation for numeric types -create_observation{S,A,O<:Number}(pomdp::POMDP{S,A,O}) = zero(O) +create_observation{S,A,O<:Number}(problem::POMDP{S,A,O}) = zero(O) """ - isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) + isterminal_obs{S,A,O}(problem::POMDP{S,A,O}, observation::O) -Checks if state s is terminal +Checks if an observation is terminal. """ -@pomdp_func isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) = false +@pomdp_func isterminal_obs{S,A,O}(problem::POMDP{S,A,O}, observation::O) = false """ - isterminal_obs{S,A,O}(pomdp::POMDP{S,A,O}, observation::O) + isterminal{S,A,O}(problem::POMDP{S,A,O}, state::S) + isterminal{S,A}(problem::MDP{S,A}, state::S) -Checks if an observation is terminal. +Checks if state s is terminal """ -@pomdp_func isterminal{S,A,O}(pomdp::POMDP{S,A,O}, state::S) = false - -# @pomdp_func isterminal(pomdp::POMDP, observation::Any) = false -# @pomdp_func isterminal_obs(pomdp::POMDP, state::Any) = false +@pomdp_func isterminal{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, state::S) = false """ - state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) + state_index{S,A,O}(problem::POMDP{S,A,O}, s::S) + state_index{S,A}(problem::MDP{S,A}, s::S) Returns the integer index of state `s`. Used for discrete models only. """ -@pomdp_func state_index{S,A,O}(pomdp::POMDP{S,A,O}, s::S) +@pomdp_func state_index{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, s::S) """ - action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) + action_index{S,A,O}(problem::POMDP{S,A,O}, a::A) + action_index{S,A}(problem::MDP{S,A}, a::A) Returns the integer index of action `a`. Used for discrete models only. """ -@pomdp_func action_index{S,A,O}(pomdp::POMDP{S,A,O}, a::A) +@pomdp_func action_index{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, a::A) """ - obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) + obs_index{S,A,O}(problem::POMDP{S,A,O}, o::O) Returns the integer index of observation `o`. Used for discrete models only. """ -@pomdp_func obs_index{S,A,O}(pomdp::POMDP{S,A,O}, o::O) +@pomdp_func obs_index{S,A,O}(problem::POMDP{S,A,O}, o::O) diff --git a/src/simulator.jl b/src/simulator.jl index 52329dba..b252ec6b 100644 --- a/src/simulator.jl +++ b/src/simulator.jl @@ -10,8 +10,9 @@ Base type for an object defining how a simulation should be carried out abstract Simulator """ - simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) + simulate{S,A,O}(simulator::Simulator, problem::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) + simulate{S,A}(simulator::Simulator, problem::MDP{S,A}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) Runs a simulation using the specified policy and returns the accumulated reward """ -@pomdp_func simulate{S,A,O}(simulator::Simulator, pomdp::POMDP{S,A,O}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) +@pomdp_func simulate(simulator::Simulator, problem::Union{POMDP,MDP}, policy::Policy, updater::BeliefUpdater, initial_belief::Belief) diff --git a/src/solver.jl b/src/solver.jl index 09d8cec1..393bc3f1 100644 --- a/src/solver.jl +++ b/src/solver.jl @@ -5,15 +5,16 @@ Base type for an MDP/POMDP solver abstract Solver """ - create_policy{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}) + create_policy(solver::Solver, problem::POMDP) + create_policy(solver::Solver, problem::MDP) Creates a policy object (for preallocation purposes) """ -@pomdp_func create_policy{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}) +@pomdp_func create_policy(solver::Solver, problem::Union{POMDP,MDP}) """ - solve{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) + solve(solver::Solver, problem::POMDP, policy=create_policy(solver, problem)) Solves the POMDP using method associated with solver, and returns a policy. """ -@pomdp_func solve{S,A,O}(solver::Solver, pomdp::POMDP{S,A,O}, policy=create_policy(solver, pomdp)) +@pomdp_func solve(solver::Solver, problem::Union{POMDP,MDP}, policy=create_policy(solver, problem)) diff --git a/src/space.jl b/src/space.jl index 1a05e3b1..c4269c1f 100644 --- a/src/space.jl +++ b/src/space.jl @@ -35,50 +35,54 @@ Returns an iterable type (array or custom iterator) corresponding to space `s`. @pomdp_func upperbound{T}(s::AbstractSpace{T}, i::Int) """ - states{S,A,O}(pomdp::POMDP{S,A,O}) + states(problem::POMDP) + states(problem::MDP) Returns the complete state space of a POMDP. """ -@pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func states(problem::Union{POMDP,MDP}) """ - states{S,A,O}(pomdp::POMDP{S,A,O}, state::S) + states{S,A,O}(problem::POMDP{S,A,O}, state::S) + states{S,A}(problem::MDP{S,A}, state::S) Returns a subset of the state space reachable from `state`. """ -@pomdp_func states{S,A,O}(pomdp::POMDP{S,A,O}, state::S, sts::AbstractSpace{S}=states(pomdp)) +@pomdp_func states{S,A}(problem::Union{POMDP{S,A},MDP{S,A}}, state::S, sts::AbstractSpace{S}=states(problem)) """ - actions{S,A,O}(pomdp::POMDP{S,A,O}) + actions(problem::POMDP) + actions(problem::MDP) Returns the entire action space of a POMDP. """ -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func actions(problem::Union{POMDP,MDP}) """ - actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) + actions{S,A,O}(problem::POMDP{S,A,O}, state::S, aspace::AbstractSpace{A}) + actions{S,A}(problem::MDP{S,A}, state::S, aspace::AbstractSpace{A}) Modifies aspace to the action space accessible from the given state and returns it. """ -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, state::S, acts::AbstractSpace{A}=actions(pomdp)) +@pomdp_func actions{S,A}(problem::Union{MDP{S,A},POMDP{S,A}}, state::S, acts::AbstractSpace{A}=actions(problem)) """ - actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief, aspace::AbstractSpace{A}) + actions{S,A,O}(problem::POMDP{S,A,O}, belief::Belief, aspace::AbstractSpace{A}) Modifies aspace to the action space accessible from the states with nonzero belief and returns it. """ -@pomdp_func actions{S,A,O}(pomdp::POMDP{S,A,O}, belief::Belief, acts::AbstractSpace{A}=actions(pomdp)) +@pomdp_func actions{S,A,O}(problem::POMDP{S,A,O}, belief::Belief, acts::AbstractSpace{A}=actions(problem)) """ - observations{S,A,O}(pomdp::POMDP{S,A,O}) + observations(problem::POMDP) Returns the entire observation space. """ -@pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}) +@pomdp_func observations(problem::POMDP) """ - observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) + observations{S,A,O}(problem::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(problem)) Modifies ospace to the observation space accessible from the given state and returns it. """ -@pomdp_func observations{S,A,O}(pomdp::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(pomdp)) +@pomdp_func observations{S,A,O}(problem::POMDP{S,A,O}, state::S, obs::AbstractSpace{O}=observations(problem)) From 0cc44da7b0f8a5e06f8023ef5067a806e508ce12 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 15 Apr 2016 10:20:58 -0700 Subject: [PATCH 29/30] fixed build instructions --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index acc64843..9809acf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,4 +12,4 @@ script: - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.test("POMDPs")' after_success: - julia -e 'Pkg.clone("https://github.com/MichaelHatherly/Documenter.jl")' - - julia -e 'cd(Pkg.dir("PACKAGE_NAME")); include(joinpath("docs", "make.jl"))' + - julia -e 'cd(Pkg.dir("POMDPs")); include(joinpath("docs", "make.jl"))' From 548f2ed09806bd902fd0517c29f58de86561ee6a Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 15 Apr 2016 10:27:42 -0700 Subject: [PATCH 30/30] added note about interface change to the readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 04c9f4d3..51a51f0e 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ This package provides a basic interface for working with partially observable Markov decision processes (POMDPs). +NEWS: We recently made a significant change to the interface, introducing parametric types (see issue #56). If you wish to continue using the old interface, the v0.1 release may be used, but we recommend that all projects update to the new version. + The goal is to provide a common programming vocabulary for researchers and students to use primarily for three tasks: 1. Expressing problems using the POMDP format.