diff --git a/src/markov/ddp.jl b/src/markov/ddp.jl
index f333fabd..420c39a6 100644
--- a/src/markov/ddp.jl
+++ b/src/markov/ddp.jl
@@ -22,7 +22,7 @@ Notes
 
 =#
 
-import Base.*
+import Base: *, ctranspose
 
 #------------------------#
 #-Types and Constructors-#
@@ -66,27 +66,34 @@ type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind}
             msg = "R must be 2-dimensional without s-a formulation"
             throw(ArgumentError(msg))
         end
-        (beta < 0 || beta >= 1) &&  throw(ArgumentError("beta must be [0, 1)"))
 
-        # verify input integrity 2
-        num_states, num_actions = size(R)
-        if size(Q) != (num_states, num_actions, num_states)
-            throw(ArgumentError("shapes of R and Q must be (n,m) and (n,m,n)"))
-        end
+        s, a = size(R)
 
-        # check feasibility
+        # check feasibility of beta
+        (0 <= beta < 1) || ArgumentError("beta must be in [0, 1)")
+
+        # verify dimensions of Q
+        size(Q) == (s, s, a) || throw(ArgumentError("shapes of R and Q must be (s, a) and (s, s, a)"))
+
+        # check feasibility of R
         R_max = s_wise_max(R)
-        if any(R_max .== -Inf)
+        if any(isinf(R_max))
             # First state index such that all actions yield -Inf
-            s = find(R_max .== -Inf) #-Only Gives True
+            s = find(isinf(R_max)) #-Only Gives True
             throw(ArgumentError("for every state the reward must be finite for
-                some action: violated for state $s"))
+                some action: violated for state(s) $(s)"))
         end
 
+        all(Q .>= 0) || throw(ArgumentError("Q must have nonnegative elements"))
+
+        # check columns of Q sum to 1
+        cols = sum(Q, 1)
+        isapprox(cols, ones(cols)) || throw(ArgumentError("columns of Q must sum to 1"))
+
         # here the indices and indptr are null.
         s_indices = Nullable{Vector{Int}}()
         a_indices = Nullable{Vector{Int}}()
-        a_indptr = Nullable{Vector{Int}}()
+        a_indptr  = Nullable{Vector{Int}}()
 
         new(R, Q, beta, s_indices, a_indices, a_indptr)
     end
@@ -376,8 +383,10 @@ for a given value function v.
 
 - `Tv::Vector` : Updated value function vector
 """
-bellman_operator(ddp::DiscreteDP, v::Vector) =
-    s_wise_max(ddp, ddp.R + ddp.beta * ddp.Q * v)
+bellman_operator(ddp::DiscreteDP, v::Vector) = begin
+    vals = ddp.R + ddp.beta * ddp.Q * v
+    s_wise_max(ddp, vals)
+end
 
 # ---------------------- #
 # Compute greedy methods #
@@ -514,8 +523,7 @@ Returns the controlled Markov chain for a given policy `sigma`.
 mc : MarkovChain
      Controlled Markov chain.
 """
-QuantEcon.MarkovChain(ddp::DiscreteDP, ddpr::DPSolveResult) =
-    MarkovChain(RQ_sigma(ddp, ddpr)[2])
+QuantEcon.MarkovChain(ddp::DiscreteDP, ddpr::DPSolveResult) = MarkovChain(RQ_sigma(ddp, ddpr)[2])
 
 """
 Method of `RQ_sigma` that extracts sigma from a `DPSolveResult`
@@ -541,15 +549,28 @@ the transition probability matrix `Q_sigma`.
   of shape (n, n).
 
 """
-function RQ_sigma{T<:Integer}(ddp::DDP, sigma::Array{T})
-    R_sigma = [ddp.R[i, sigma[i]] for i in 1:length(sigma)]
-    Q_sigma = hcat([getindex(ddp.Q, i, sigma[i], Colon())[:] for i=1:num_states(ddp)]...)
-    return R_sigma, Q_sigma'
+function RQ_sigma{T,Tbeta,Tind,U<:Integer}(ddp::DDP{T,Tbeta,Tind}, sigma::Vector{U})
+    s, s_, a = size(ddp.Q)
+    msg = "length of policy vector ($(length(sigma))) does not match number of actions in model ($a)"
+    length(sigma) == s || throw(ArgumentError(msg))
+
+    R_sigma = Vector{T}(a)
+    Q_sigma = Matrix{T}(a, s)
+
+    for (i, s) in enumerate(sigma)
+        R_sigma[i]    = ddp.R[s, i]
+        Q_sigma[i, :] = ddp.Q[:, i, s]
+    end
+
+    return R_sigma, Q_sigma
 end
 
 # TODO: express it in a similar way as above to exploit Julia's column major order
-function RQ_sigma{T<:Integer}(ddp::DDPsa, sigma::Array{T})
-    sigma_indices = Array(T, num_states(ddp))
+function RQ_sigma{U<:Integer}(ddp::DDPsa, sigma::Vector{U})
+    msg = "length of policy vector ($(length(sigma))) does not match number of actions in model ($num_states(ddp))"
+    length(sigma) == num_states(ddp) || throw(ArgumentError(msg))
+
+    sigma_indices = Vector{U}(num_states(ddp))
     _find_indices!(get(ddp.a_indices), get(ddp.a_indptr), sigma, sigma_indices)
     R_sigma = ddp.R[sigma_indices]
     Q_sigma = ddp.Q[sigma_indices, :]
@@ -581,25 +602,26 @@ s_wise_max!(vals::AbstractMatrix, out::Vector) = (println("calling this one! ");
 
 """
 Populate `out` with  `max_a vals(s, a)`,  where `vals` is represented as a
-`AbstractMatrix` of size `(num_states, num_actions)`.
+`AbstractMatrix` of size `(num_actions, num_states)`.
 
 Also fills `out_argmax` with the column number associated with the indmax in
 each row
 """
 function s_wise_max!(vals::AbstractMatrix, out::Vector, out_argmax::Vector)
-    # naive implementation where I just iterate over the rows
+    length(out) == length(out_argmax) == size(vals, 1) || throw(DimensionMismatch())
+
     nr, nc = size(vals)
-    for i_r in 1:nr
+    for i_c in 1:nc # index over columns
         # reset temporaries
         cur_max = -Inf
-        out_argmax[i_r] = 1
+        out_argmax[i_c] = 1
 
-        for i_c in 1:nc
+        for i_r in 1:nr # index over rows
             @inbounds v_rc = vals[i_r, i_c]
             if v_rc > cur_max
-                out[i_r] = v_rc
-                out_argmax[i_r] = i_c
-                cur_max = v_rc
+                out[i_c]        = v_rc
+                out_argmax[i_c] = i_r
+                cur_max         = v_rc
             end
         end
 
@@ -625,7 +647,7 @@ Populate `out` with  `max_a vals(s, a)`,  where `vals` is represented as a
 function s_wise_max!(a_indices::Vector, a_indptr::Vector, vals::Vector,
                       out::Vector)
     n = length(out)
-    for i in 1:n
+    @inbounds for i in 1:n
         if a_indptr[i] != a_indptr[i+1]
             m = a_indptr[i]
             for j in a_indptr[i]+1:(a_indptr[i+1]-1)
@@ -734,14 +756,29 @@ Define Matrix Multiplication between 3-dimensional matrix and a vector
 Matrix multiplication over the last dimension of A
 
 """
-function *{T}(A::Array{T,3}, v::Vector)
-    shape = size(A)
-    size(v, 1) == shape[end] || error("wrong dimensions")
+function ctranspose{T}(A::Array{T,3})
+    m, n, o = size(A)
+    B = Array{T}(o, m, n)
+    @inbounds for i in 1:m, j in 1:n, k in 1:o
+        # permute indices
+        B[k,i,j] = A[i,j,k]
+    end
+    return B
+end
+
+function *{T,U}(Q::Array{T,3}, v::Vector{U})
+    s, s_, a = size(Q)
+    msg = "inappropriate dimension stochastic maxtrix"
+    s == s_ || throw(ArgumentError(msg))
+    s == length(v) || throw(DimensionMismatch())
 
-    B = reshape(A, (prod(shape[1:end-1]), shape[end]))
-    out = B * v
+    R = Array{promote_type(T,U)}(a, s)
+
+    @inbounds for i in 1:s, j in 1:a
+        R[j, i] = dot(v, Q[:, i, j])
+    end
 
-    return reshape(out, shape[1:end-1])
+    return R
 end
 
 """
diff --git a/test/test_ddp.jl b/test/test_ddp.jl
index f5521513..f949aa06 100644
--- a/test/test_ddp.jl
+++ b/test/test_ddp.jl
@@ -10,7 +10,6 @@ Tests for markov/ddp.jl
 =#
 
 @testset "Testing markov/dpp.jl" begin
-
     #-Setup-#
 
     # Example from Puterman 2005, Section 3.1
@@ -18,17 +17,20 @@ Tests for markov/ddp.jl
 
     # Formulation with Dense Matrices R: n x m, Q: n x m x n
     n, m = 2, 2  # number of states, number of actions
+
     R = [5.0 10.0; -1.0 -Inf]
-    Q = Array(Float64, n, m, n)
+
+    Q = Array{Float64}(n, m, n)
     Q[:, :, 1] = [0.5 0.0; 0.0 0.0]
     Q[:, :, 2] = [0.5 1.0; 1.0 1.0]
 
-    ddp0 = DiscreteDP(R, Q, beta)
+    ddp0 = DiscreteDP(R', Q', beta)
 
     # Formulation with state-action pairs
     L = 3  # Number of state-action pairs
     s_indices = [1, 1, 2]
     a_indices = [1, 2, 1]
+
     R_sa = [R[1, 1], R[1, 2], R[2, 1]]
     Q_sa = spzeros(L, n)
     Q_sa[1, :] = Q[1, 1, :]
@@ -47,78 +49,66 @@ Tests for markov/ddp.jl
     v_star = [(5-5.5*beta)/((1-0.5*beta)*(1-beta)), -1/(1-beta)]
     sigma_star = [1, 1]
 
-    @testset "bellman_operator methods" begin
+    ddp_rational = DiscreteDP(convert(Array{Rational{BigInt}}, R'),
+                              convert(Array{Rational{BigInt}}, Q'),
+                              convert(Rational{BigInt},  beta))
+
+    @testset "bellman_operator methods" for ddp in ddp0_collection
         # Check both Dense and State-Action Pair Formulation
-        for ddp in ddp0_collection
-        	@test isapprox(bellman_operator(ddp, v_star), v_star)
-    	end
+    	@test isapprox(bellman_operator(ddp, v_star), v_star)
     end
 
-    @testset "RQ_sigma" begin
-        nr, nc = size(R)
-        # test for DDP
-        sigmas = ([1, 1], [1, 2], [2, 1], [2, 2])
-        for sig in sigmas
-            r, q = RQ_sigma(ddp0, sig)
-
-            for i_r in 1:nr
-                @test r[i_r] == ddp0.R[i_r, sig[i_r]]
-                for i_c in 1:length(sig)
-                    @test vec(q[i_c, :]) == vec(ddp0.Q[i_c, sig[i_c], :])
-                end
-            end
-        end
+    @testset "RQ_sigma" for sig in ([1, 1], [1, 2], [2, 1], [2, 2])
+        r, q = RQ_sigma(ddp0, sig)
 
+        for (i,s) in enumerate(sig)
+            @test r[i] == ddp0.R[s, i]
+            @test vec(q[i, :]) == vec(ddp0.Q[:, i, s])
+        end
         # TODO: add test for DDPsa
     end
 
-    @testset "compute_greedy methods" begin
+    @testset "compute_greedy methods" for ddp in ddp0_collection
         # Check both Dense and State-Action Pair Formulation
-        for ddp in ddp0_collection
-        	@test compute_greedy(ddp, v_star) == sigma_star
-    	end
+    	@test compute_greedy(ddp, v_star) == sigma_star
     end
 
-    @testset "evaluate_policy methods" begin
+    @testset "evaluate_policy methods" for ddp in ddp0_collection
         # Check both Dense and State-Action Pair Formulation
-        for ddp in ddp0_collection
-        	@test isapprox(evaluate_policy(ddp, sigma_star), v_star)
-        end
+    	@test isapprox(evaluate_policy(ddp, sigma_star), v_star)
     end
 
-    @testset "methods for subtypes != (Float64, Int)" begin
-        float_types = [Float16, Float32, Float64, BigFloat]
-        int_types = [Int8, Int16, Int32, Int64, Int128,
-                     UInt8, UInt16, UInt32, UInt64, UInt128]
-
-        for ddp in ddp0_collection
-            for f in (bellman_operator, compute_greedy)
-                for T in float_types
-                    f_f64 = f(ddp, [1.0, 1.0])
-                    f_T = f(ddp, ones(T, 2))
-                    @test isapprox(f_f64, convert(Vector{eltype(f_f64)}, f_T))
-                end
-
-                # only Integer subtypes can be Rational type params
-                # NOTE: Only the integer types below don't overflow for this example
-                for T in [Int64, Int128]
-                    @test f(ddp, [1//1, 1//1]) == f(ddp, ones(Rational{T}, 2))
-                end
-            end
+    float_types = (Float16, Float32, Float64, BigFloat)
+    int_types = (Int8,  Int16,  Int32,  Int64,  Int128,
+                 UInt8, UInt16, UInt32, UInt64, UInt128)
 
-            for T in float_types, S in int_types
-                v = ones(T, 2)
-                s = ones(S, 2)
-                # just test that we can call the method and the result is
-                # deterministic
-                @test bellman_operator!(ddp, v, s) == bellman_operator!(ddp, v, s)
+    @testset "methods for subtypes != (Float64, Int)" for ddp in ddp0_collection
+        for f in (bellman_operator, compute_greedy)
+            for T in float_types
+                f_f64 = f(ddp, [1.0, 1.0])
+                f_T = f(ddp, ones(T, 2))
+                @test isapprox(f_f64, convert(Vector{eltype(f_f64)}, f_T))
             end
 
-            for T in int_types
-                s = T[1, 1]
-                @test isapprox(evaluate_policy(ddp, s), v_star)
+            # only Integer subtypes can be Rational type params
+            # NOTE: Only the integer types below don't overflow for this example
+            for T in (Int64, Int128)
+                @test f(ddp, [1//1, 1//1]) == f(ddp, ones(Rational{T}, 2))
             end
         end
+
+        for T in float_types, S in int_types
+            v = ones(T, 2)
+            s = ones(S, 2)
+            # just test that we can call the method and the result is
+            # deterministic
+            @test bellman_operator!(ddp, v, s) == bellman_operator!(ddp, v, s)
+        end
+
+        for T in int_types
+            s = T[1, 1]
+            @test isapprox(evaluate_policy(ddp, s), v_star)
+        end
     end
 
     @testset "compute_greedy! changes ddpr.v" begin
@@ -128,46 +118,39 @@ Tests for markov/ddp.jl
         @test maxabs(res.Tv - 500.0) > 0
     end
 
-    @testset "value_iteration" begin
+    @testset "value_iteration" for ddp_item in ddp0_collection
         # Check both Dense and State-Action Pair Formulation
-        for ddp_item in ddp0_collection
-            # Compute Result
-            res = solve(ddp_item, VFI)
-            v_init = [0.0, 0.0]
-            res_init = solve(ddp_item, v_init, VFI; epsilon=epsilon)
-
-            # Check v is an epsilon/2-approxmation of v_star
-            @test maxabs(res.v - v_star) < epsilon/2
-            @test maxabs(res_init.v - v_star)    < epsilon/2
-
-            # Check sigma == sigma_star.
-            # NOTE we need to convert from linear to row-by-row index
-            @test res.sigma == sigma_star
-            @test res_init.sigma == sigma_star
-        end
+        # Compute Result
+        res = solve(ddp_item, VFI)
+        v_init = [0.0, 0.0]
+        res_init = solve(ddp_item, v_init, VFI; epsilon=epsilon)
+
+        # Check v is an epsilon/2-approxmation of v_star
+        @test maxabs(res.v - v_star) < epsilon/2
+        @test maxabs(res_init.v - v_star) < epsilon/2
+
+        # Check sigma == sigma_star.
+        # NOTE we need to convert from linear to row-by-row index
+        @test res.sigma == sigma_star
+        @test res_init.sigma == sigma_star
     end
 
-    @testset "policy_iteration" begin
+    @testset "policy_iteration" for ddp in ddp0_collection
         # Check both Dense and State-Action Pair Formulation
-        for ddp_item in ddp0_collection
-            res = solve(ddp_item, PFI)
-            v_init = [0.0, 1.0]
-            res_init = solve(ddp_item, v_init, PFI)
-
-            # Check v == v_star
-            @test isapprox(res.v, v_star)
-            @test isapprox(res_init.v, v_star)
-
-            # Check sigma == sigma_star
-            @test res.sigma == sigma_star
-            @test res_init.sigma == sigma_star
-        end
+        res = solve(ddp, PFI)
+        v_init = [0.0, 1.0]
+        res_init = solve(ddp, v_init, PFI)
+
+        # Check v == v_star
+        @test isapprox(res.v, v_star)
+        @test isapprox(res_init.v, v_star)
+
+        # Check sigma == sigma_star
+        @test res.sigma == sigma_star
+        @test res_init.sigma == sigma_star
     end
 
     @testset "DiscreteDP{Rational,_,_,Rational} maintains Rational" begin
-        ddp_rational = DiscreteDP(map(Rational{BigInt}, R),
-                                  map(Rational{BigInt}, Q),
-                                  map(Rational{BigInt}, beta))
         # do minimal number of iterations to avoid overflow
         vi = Rational{BigInt}[1//2, 1//2]
         @test eltype(solve(ddp_rational, VFI; max_iter=1, epsilon=Inf).v) == Rational{BigInt}
@@ -175,10 +158,8 @@ Tests for markov/ddp.jl
         @test eltype(solve(ddp_rational, vi, MPFI; max_iter=1, k=1, epsilon=Inf).v) == Rational{BigInt}
     end
 
-    @testset "DiscreteDP{Rational{BigInt},_,_,Rational{BigInt}}  works" begin
-        ddp_rational = DiscreteDP(map(Rational{BigInt}, R),
-                                  map(Rational{BigInt}, Q),
-                                  map(Rational{BigInt}, beta))
+    @testset "DiscreteDP{Rational{BigInt},_,_,Rational{BigInt}} works" begin
+
         # do minimal number of iterations to avoid overflow
         r1 = solve(ddp_rational, PFI)
         r2 = solve(ddp_rational, MPFI)
@@ -190,30 +171,28 @@ Tests for markov/ddp.jl
         @test r1.mc.p == r3.mc.p
     end
 
-    @testset "modified_policy_iteration" begin
-        for ddp_item in ddp0_collection
-            res = solve(ddp_item, MPFI)
-            v_init = [0.0, 1.0]
-            res_init = solve(ddp_item, v_init, MPFI)
+    @testset "modified_policy_iteration" for ddp_item in ddp0_collection
+        res = solve(ddp_item, MPFI)
+        v_init = [0.0, 1.0]
+        res_init = solve(ddp_item, v_init, MPFI)
 
-                    # Check v is an epsilon/2-approxmation of v_star
-            @test maxabs(res.v - v_star) < epsilon/2
-            @test maxabs(res_init.v - v_star) < epsilon/2
+        # Check v is an epsilon/2-approxmation of v_star
+        @test maxabs(res.v - v_star) < epsilon/2
+        @test maxabs(res_init.v - v_star) < epsilon/2
 
-            # Check sigma == sigma_star
-            @test res.sigma == sigma_star
-            @test res_init.sigma == sigma_star
+        # Check sigma == sigma_star
+        @test res.sigma == sigma_star
+        @test res_init.sigma == sigma_star
 
-            #Test Modified Policy Iteration k0
-            k = 0
-            res = solve(ddp_item, MPFI; max_iter=max_iter, epsilon=epsilon, k=k)
+        #Test Modified Policy Iteration k0
+        k = 0
+        res = solve(ddp_item, MPFI; max_iter=max_iter, epsilon=epsilon, k=k)
 
-            # Check v is an epsilon/2-approxmation of v_star
-            @test maxabs(res.v - v_star) < epsilon/2
+        # Check v is an epsilon/2-approxmation of v_star
+        @test maxabs(res.v - v_star) < epsilon/2
 
-            # Check sigma == sigma_star
-            @test res.sigma == sigma_star
-        end
+        # Check sigma == sigma_star
+        @test res.sigma == sigma_star
     end
 
     @testset "DDPsa constructor" begin
@@ -264,9 +243,9 @@ Tests for markov/ddp.jl
         @testset "feasbile action pair" begin
             #Dense Matrix
             n, m = 2, 2
-            _R = [-Inf -Inf; 1.0 2.0]
+            _R = [-Inf -Inf; 1.0 2.0]'
 
-            _Q = Array(Float64, n, m, n)
+            _Q = Array{Float64}(n, m, n)
             _Q[:, :, 1] = [0.5 0.0; 0.0 0.0]
             _Q[:, :, 2] = [0.5 1.0; 1.0 1.0]
             _beta = 0.95
@@ -290,9 +269,7 @@ Tests for markov/ddp.jl
     @testset "ddp_negative_inf_error()" begin
         # Dense Matrix
         n, m = 3, 2
-        R = [0 1;
-             0 -Inf;
-            -Inf -Inf]
+        R = [0 1; 0 -Inf; -Inf -Inf]
         Q = fill(1.0/n, n, m, n)
         beta = 0.95