;;; -*- Mode: Lisp; Syntax: Common-Lisp; -*- File: language/dcg.lisp ;;;; DCG (Definite Clause Grammar) Parser (defun parse (words &key (cat 'S) (as `(,cat $x))) "Find one parse for the list of words. You can specify the category to parse, but if the category takes multiple args, use the :AS keyword." (prologx as `(,@as ,(consify words) ()))) (defun parses (words &key (cat 'S) (as `(,cat $x))) "Find all the parses for the words. You can specify the category to parse, but if the category takes multiple args, use the :AS keyword." (remove-duplicates (prologs as `(,@as ,(consify words) ())) :test #'equal)) (defun say (sem) "Generate a sentence that represents this semantic form." (unconsify (prologx '$words `(S ,sem $words nil)))) (defun define-language (&key name grammar lexicon) "Defines a language in terms of a grammar and lexicon using DCG rules. GRAMMAR is a list of rules of the form: ((Lhs arg...) -> (Rhs arg...) ...) Within the right-hand-side, each literal can be either: 'word Parse a single word (:test (P)...) Don't parse anything, but execute (P)... (Cat arg...) Parse this category with these arguments. This is exactly like normal Prolog DCG rules, except in Lisp notation. LEXICON is a list of rules of the form: ((Cat $x...) => (word1 arg...) (word2 arg...) ...). This is equivalent to a single DCG rule and a list of assertion: ((Cat $x...) -> '$word (:test (Lex_Cat $word $x...))) (Lex_Cat word1 arg...) (Lex_Cat word2 arg...) ... If there are no args, you can use either (Cat) or just Cat. This function deletes the old language definition (and everything else in the theory) unless EMPTY is NIL." (dolist (rule grammar) (save (dcg->prolog rule))) (dolist (rule lexicon) (save-lexical-rule rule))) (defun save-lexical-rule (rule) "Convert a lexical rule into Prolog assertions and save them." (let ((cat (if (listp (first rule)) (first rule) (list (first rule))))) (save `(<= (,@cat (cons $word $S_1) $S_1) (,(make-lex (first cat)) $word ,@(rest cat)))) (dolist (word (subseq rule 2)) (let ((args (if (atom word) (make-list (length cat) :initial-element word) word))) (save `(,(make-lex (first cat)) ,@args)))))) (defun dcg->prolog (rule) "Convert a DCG rule of the form (Lhs -> Rhs...) to a prolog rule of the form (<= (Lhs $s0 $sn) (Rhs1 $s0 $s1) ... (Rhsn $s5 $sn))." (let* (($s0 ($s :init 0)) (new-rhs (mapcar #'convert-dcg-literal (subseq rule 2))) (new-lhs `(,@(first rule) ,$s0 ,($s :inc 0)))) `(<= ,new-lhs ,@new-rhs))) (defun convert-dcg-literal (literal) "Convert a literal on the rhs or lhs of a DCG rule into a Prolog literal." (let (($s1 ($s :inc 0)) (args (rest literal))) (case (first literal) (:test (let ((tests args)) (if (= (length tests) 1) (first tests) (cons 'and tests)))) (quote `(= ,$s1 (,@args . ,($s)))) (t `(,@literal ,$s1 ,($s)))))) ;;; Note that the symbols that are created by the program, $S_0, $S_1, ... ;;; and LEX_NOUN, LEX_VERB, ... have an underscore in them. The idea is ;;; to avoid name clashes with the user's variable names. (defvar *$s-counter* 0 "Used to keep track of which $s_i symbol to make next.") (defun $s (&key (init *$s-counter*) (inc 1)) "Return a symbol like $s_0, $s_1, ..." (intern (format nil "$S_~d" (setq *$s-counter* (+ init inc))))) (defun make-lex (cat) "Make a new symbol for this lexical category." (intern (format nil "LEX_~a" cat))) (defun consify (list) "Given (a b c), return (cons a (cons b (cons c nil)))." (if (null list) nil (list 'cons (first list) (consify (rest list))))) (defun unconsify (list) "Given (cons a (cons b (cons c nil))), return (a b c)." (if (null list) nil (cons (second list) (unconsify (third list))))) (defun test-parse (&optional (sentences *sentences*)) "Run some test sentences, and count how many were not parsed." (count-if-not #'(lambda (s) (format t "~2&>>> ~(~{~a ~}~)~%" s) (write (second (parse s)) :pretty t)) sentences)) (defun test-say (&optional (sentences *sentences*)) "See if SAY can generate the sentences that PARSE can parse." (count-if-not #'(lambda (s) (let ((say-s (say (second (parse s))))) (format t "~&~(~a~)~40T~a" s say-s) (equal s say-s))) sentences))