Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Parsing grammars using OCaml

I have a task to write a (toy) parser for a (toy) grammar using OCaml and not sure how to start (and proceed with) this problem.

Here's a sample Awk grammar:

type ('nonterm, 'term) symbol = N of 'nonterm | T of 'term;;

type awksub_nonterminals = Expr | Term | Lvalue | Incrop | Binop | Num;;

let awksub_grammar =
  (Expr,
   function
     | Expr ->
         [[N Term; N Binop; N Expr];
          [N Term]]
     | Term ->
     [[N Num];
      [N Lvalue];
      [N Incrop; N Lvalue];
      [N Lvalue; N Incrop];
      [T"("; N Expr; T")"]]
     | Lvalue ->
     [[T"$"; N Expr]]
     | Incrop ->
     [[T"++"];
      [T"--"]]
     | Binop ->
     [[T"+"];
      [T"-"]]
     | Num ->
     [[T"0"]; [T"1"]; [T"2"]; [T"3"]; [T"4"];
      [T"5"]; [T"6"]; [T"7"]; [T"8"]; [T"9"]]);;

And here's some fragments to parse:

let frag1 = ["4"; "+"; "3"];;
let frag2 = ["9"; "+"; "$"; "1"; "+"];;

What I'm looking for is a rulelist that is the result of the parsing a fragment, such as this one for frag1 ["4"; "+"; "3"]:

 [(Expr, [N Term; N Binop; N Expr]);
  (Term, [N Num]);
  (Num, [T "3"]);
  (Binop, [T "+"]);
  (Expr, [N Term]);
  (Term, [N Num]);
  (Num, [T "4"])]

The restriction is to not use any OCaml libraries other than List... :/

like image 711
DV. Avatar asked Oct 18 '09 07:10

DV.


2 Answers

Here is a rough sketch - straightforwardly descend into the grammar and try each branch in order. Possible optimization : tail recursion for single non-terminal in a branch.

exception Backtrack

let parse l =
  let rules = snd awksub_grammar in
  let rec descend gram l =
    let rec loop = function 
      | [] -> raise Backtrack
      | x::xs -> try attempt x l with Backtrack -> loop xs
    in
    loop (rules gram)
  and attempt branch (path,tokens) =
    match branch, tokens with
    | T x :: branch' , h::tokens' when h = x -> 
        attempt branch' ((T x :: path),tokens')
    | N n :: branch' , _ -> 
        let (path',tokens) = descend n ((N n :: path),tokens) in 
        attempt branch' (path', tokens)
    | [], _ -> path,tokens
    | _, _ -> raise Backtrack
  in
  let (path,tail) = descend (fst awksub_grammar) ([],l) in
  tail, List.rev path
like image 69
ygrek Avatar answered Oct 22 '22 04:10

ygrek


I'm not sure if you specifically require the derivation tree, or if this is a just a first step in parsing. I'm assuming the latter.

You could start by defining the structure of the resulting abstract syntax tree by defining types. It could be something like this:

type expr =
    | Operation of term * binop * term
    | Term of term
and term =
    | Num of num
    | Lvalue of expr
    | Incrop of incrop * expression
and incrop = Incr | Decr
and binop = Plus | Minus
and num = int

Then I'd implement a recursive descent parser. Of course it would be much nicer if you could use streams combined with the preprocessor camlp4of...

By the way, there's a small example about arithmetic expressions in the OCaml documentation here.

like image 23
jdb Avatar answered Oct 22 '22 05:10

jdb