;;; uncertainty/domains/mdp.lisp
;;; Definitions for Markov decision processes (MDPs).

;;; An MDP is defined by initial state, transition model, rewards, and
;;; distinguished terminal states. Model and rewards are hash tables
;;; index by state (after application of hash-key function).
;;; The entries in the model are alists keyed by action; each action is
;;; associated with an action model: basically a list of transitions.
;;; Markov chains (i.e., stochastic processes with no distinct agent)
;;; can be defined by allowing only a no-op action in the MDP.

(defstruct mdp
  initial-state               ;;; The initial state for the problem
  model                       ;;; Hash table describing transition probabilities
  rewards                     ;;; Hash table of rewards for each state
  terminal-states             ;;; List of terminal states
  (hash-key #'identity)       ;;; To convert states into hash keys
  name)                       ;;; String, identifies the problem

(defstruct (mdp-action-model (:type list))
  (transitions nil)
  (times-executed 0))

(defstruct (transition (:type list))
  destination
  probability
  (times-achieved 0))
  
;;; (transitions a s M) returns the transitions resulting from executing
;;; action a in state s according to model M. (actions s M) returns the
;;; list of actions feasible in state s according to model M.

(defun action-model (a s M)
  (cdr (assoc a (gethash s M) :test #'eq)))

(defun transitions (a s M) (mdp-action-model-transitions (action-model a s M)))

(defun actions (s M) (mapcar #'car (gethash s M)))