;;; -*- Mode: Lisp; Syntax: Common-Lisp; -*- File: search/algorithms/minimax ;;;; Deciding What Move to Make in a Game by Minimax or Alpha-Beta Search ;;; The minimax decision procedure returns the optimal move in the game ;;; using exhaustive generation of the entire game tree. Implementation ;;; uses the fact that the evaluation and utility functions return a list of ;;; values from the point of view of each player, with the "current" player ;;; first. Hence, rather than using #'min, we always use #'max for the ;;; current player. A successor value is passed up the tree using ;;; right-rotation. This works for any number of players. ;;; The notation "a+s" means an (action . state) pair. (defun minimax-decision (state game) "Return the best action, according to backed-up evaluation." (car (the-biggest #'(lambda (a+s) (first (right-rotate (minimax-value (cdr a+s) game)))) (game-successors state game)))) (defun minimax-value (state game) (or (funcall (game-terminal-test game) state) (right-rotate (the-biggest #'(lambda (values) (first (right-rotate values))) (mapcar #'(lambda (a+s) (minimax-value (cdr a+s) game)) (game-successors state game)))))) (defun minimax-cutoff-decision (state game eval-fn limit) "Return the best action, according to backed-up evaluation down to LIMIT." (car (the-biggest #'(lambda (a+s) (first (right-rotate (minimax-cutoff-value (cdr a+s) game eval-fn (1- limit))))) (game-successors state game)))) (defun minimax-cutoff-value (state game eval-fn limit) (or (funcall (game-terminal-test game) state) (when (<= 0 limit) (funcall eval-fn state)) (right-rotate (the-biggest #'(lambda (values) (first (right-rotate values))) (mapcar #'(lambda (a+s) (minimax-cutoff-value (cdr a+s) game eval-fn (1- limit))) (game-successors state game)))))) ;;;; Alpha-Beta Search ;;; The alpha-beta decision procedure returns the optimal move according to ;;; a limited-depth search using the evaluation function. Implementation ;;; uses the fact that the evaluation and utility functions return a list of ;;; values from the point of view of each player, with the "current" player ;;; first. Hence, rather than using #'min, we always use #'max for the ;;; current player. A successor value is passed up the tree using ;;; right-rotation. This version of alpha-beta works only for two players, ;;; and requires that the game is "zero-sum", i.e., the evaluation for one ;;; player is the opposite of the evaluation for the other. (defun alpha-beta-decision (state game eval-fn &optional (limit 10)) (car (the-biggest #'(lambda (a+s) (first (right-rotate (alpha-value (cdr a+s) game (game-worst game) (game-worst game) eval-fn (1- limit))))) (game-successors state game)))) (defun alpha-value (state game alpha beta eval-fn limit) (or (funcall (game-terminal-test game) state) (when (zerop limit) (funcall eval-fn state)) (dolist (a+s (game-successors state game) (list alpha (- alpha))) (setq alpha (max alpha (first (right-rotate (beta-value (cdr a+s) game alpha beta eval-fn (1- limit)))))) (when (>= alpha (- beta)) (return (list (- beta) beta)))))) (defun beta-value (state game alpha beta eval-fn limit) (or (funcall (game-terminal-test game) state) (when (zerop limit) (funcall eval-fn state)) (dolist (a+s (game-successors state game) (list beta (- beta))) (setq beta (max beta (first (right-rotate (alpha-value (cdr a+s) game alpha beta eval-fn (1- limit)))))) (when (>= beta (- alpha)) (return (list (- alpha) alpha))))))