;;; -*- Mode: Lisp; Syntax: Common-Lisp; -*- ;;; Environment definitions for Markov decision problems and ;;; reinforcement learning environments. Currently the easiest ;;; way to make an environment is to construct an MDP and then ;;; convert it to an environment using mdp->environment. (defstruct (mdp-environment (:print-function print-environment) (:include environment)) (epochs-left 0)) (defstruct (mdp-percept (:type list)) "A percept gives the current state and the reward received" state reward terminalp) (defun mdp->environment (mdp &key agent (name (mdp-name mdp))) (make-mdp-environment :percept-fn #'(lambda (agent env) (declare (ignore agent)) (let* ((state (environment-state env)) (state-key (funcall (mdp-hash-key mdp) state))) (make-mdp-percept :state state :reward (gethash state-key (mdp-rewards mdp)) :terminalp (not (null (member state (mdp-terminal-states mdp) :test #'equal)))))) :update-fn #'(lambda (env) (cond ((member (environment-state env) (mdp-terminal-states mdp) :test #'equal) (decf (mdp-environment-epochs-left env)) (setf (environment-state env) (mdp-initial-state mdp))) (t (setf (environment-state env) (mdp-next-state (agent-action agent) (environment-state env) mdp))))) :termination-fn #'(lambda (env) (= 0 (mdp-environment-epochs-left env))) :state (mdp-initial-state mdp) :agents (list agent) :name name)) (defun mdp-next-state (action state mdp &aux (state-key (funcall (mdp-hash-key mdp) state))) (random-transition (mdp-transitions action (gethash state-key (mdp-model mdp))))) (defun mdp-transitions (action state-model) (mdp-action-model-transitions (cdr (assoc action state-model :test #'equal)))) (defun random-transition (transitions &aux (r (random 1.0))) (dolist (transition transitions) (decf r (transition-probability transition)) (unless (plusp r) (return (transition-destination transition)))))