;;; -*- Mode: Lisp; Syntax: Common-Lisp; -*-

;;; Environment definitions for Markov decision problems and
;;; reinforcement learning environments. Currently the easiest
;;; way to make an environment is to construct an MDP and then
;;; convert it to an environment using mdp->environment.


(defstruct (mdp-environment (:print-function print-environment)
			    (:include environment))
  (epochs-left 0))

(defstruct (mdp-percept (:type list))
  "A percept gives the current state and the reward received"
  state reward terminalp)


(defun mdp->environment (mdp &key agent
                                  (name (mdp-name mdp)))
  (make-mdp-environment 
   :percept-fn #'(lambda (agent env) (declare (ignore agent))
                   (let* ((state (environment-state env))
                          (state-key (funcall (mdp-hash-key mdp) state)))
                     (make-mdp-percept
                      :state state
                      :reward (gethash state-key (mdp-rewards mdp))
                      :terminalp (not (null 
				       (member state (mdp-terminal-states mdp) 
                                         :test #'equal))))))

   :update-fn #'(lambda (env)
                  (cond ((member (environment-state env)
                                 (mdp-terminal-states mdp) :test #'equal)
                         (decf (mdp-environment-epochs-left env))
                         (setf (environment-state env) (mdp-initial-state mdp)))
                        (t (setf (environment-state env)
                                 (mdp-next-state (agent-action agent)
                                                 (environment-state env)
                                                 mdp)))))
   :termination-fn #'(lambda (env) (= 0 (mdp-environment-epochs-left env)))
   :state (mdp-initial-state mdp)
   :agents (list agent)
   :name name))


(defun mdp-next-state (action state mdp
                       &aux (state-key (funcall (mdp-hash-key mdp) state)))
    (random-transition 
     (mdp-transitions action (gethash state-key (mdp-model mdp)))))

(defun mdp-transitions (action state-model)
  (mdp-action-model-transitions 
   (cdr (assoc action state-model :test #'equal))))
                       
(defun random-transition (transitions &aux (r (random 1.0)))
  (dolist (transition transitions)
    (decf r (transition-probability transition))
    (unless (plusp r) (return (transition-destination transition)))))