;;; -*- Mode: Lisp; Syntax: Common-Lisp; -*- File: search/algorithms/minimax

;;;; Deciding What Move to Make in a Game by Minimax or Alpha-Beta Search

;;; The minimax decision procedure returns the optimal move in the game
;;; using exhaustive generation of the entire game tree.  Implementation
;;; uses the fact that the evaluation and utility functions return a list of
;;; values from the point of view of each player, with the "current" player
;;; first. Hence, rather than using #'min, we always use #'max for the
;;; current player.  A successor value is passed up the tree using
;;; right-rotation.  This works for any number of players.

;;; The notation "a+s" means an (action . state) pair.

(defun minimax-decision (state game)
  "Return the best action, according to backed-up evaluation."
  (car (the-biggest
        #'(lambda (a+s) (first (right-rotate (minimax-value (cdr a+s) game))))
        (game-successors state game))))

(defun minimax-value (state game)
  (or (funcall (game-terminal-test game) state)
      (right-rotate
       (the-biggest
        #'(lambda (values) (first (right-rotate values)))
        (mapcar #'(lambda (a+s) (minimax-value (cdr a+s) game))
                (game-successors state game))))))

(defun minimax-cutoff-decision (state game eval-fn limit)
  "Return the best action, according to backed-up evaluation down to LIMIT."
  (car (the-biggest
        #'(lambda (a+s) 
            (first (right-rotate 
                    (minimax-cutoff-value (cdr a+s) game eval-fn (1- limit)))))
        (game-successors state game))))

(defun minimax-cutoff-value (state game eval-fn limit)
  (or (funcall (game-terminal-test game) state)
      (when (<= 0 limit)
        (funcall eval-fn  state))
      (right-rotate
       (the-biggest
        #'(lambda (values) (first (right-rotate values)))
        (mapcar #'(lambda (a+s) 
                    (minimax-cutoff-value (cdr a+s) game eval-fn (1- limit)))
                (game-successors state game))))))

;;;; Alpha-Beta Search

;;; The alpha-beta decision procedure returns the optimal move according to
;;; a limited-depth search using the evaluation function.  Implementation
;;; uses the fact that the evaluation and utility functions return a list of
;;; values from the point of view of each player, with the "current" player
;;; first. Hence, rather than using #'min, we always use #'max for the
;;; current player.  A successor value is passed up the tree using
;;; right-rotation.  This version of alpha-beta works only for two players,
;;; and requires that the game is "zero-sum", i.e., the evaluation for one
;;; player is the opposite of the evaluation for the other.

(defun alpha-beta-decision (state game eval-fn &optional (limit 10))
  (car (the-biggest
        #'(lambda (a+s) 
            (first (right-rotate 
                    (alpha-value (cdr a+s) game
				 (game-worst game) (game-worst game)
                                 eval-fn (1- limit)))))
        (game-successors state game))))

(defun alpha-value (state game alpha beta eval-fn limit)
  (or (funcall (game-terminal-test game) state)
      (when (zerop limit)
        (funcall eval-fn state))
      (dolist (a+s (game-successors state game)
                   (list alpha (- alpha)))
        (setq alpha (max alpha
                         (first (right-rotate
                                 (beta-value (cdr a+s) game alpha beta eval-fn
					     (1- limit))))))
        (when (>= alpha (- beta))
          (return (list (- beta) beta))))))

(defun beta-value (state game alpha beta eval-fn limit)
  (or (funcall (game-terminal-test game) state)
      (when (zerop limit)
        (funcall eval-fn state))
      (dolist (a+s (game-successors state game)
                   (list beta (- beta)))
        (setq beta (max beta
			(first (right-rotate
				(alpha-value (cdr a+s) game alpha beta eval-fn
					     (1- limit))))))
        (when (>= beta (- alpha))
          (return (list (- alpha) alpha))))))