Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] add a CST structure #306

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
Draft
38 changes: 38 additions & 0 deletions src/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,44 @@ module Impl = struct

type attributes = (string * string) list
type doc = attributes block list

let rec of_cst_block (blk : Cst.Impl.attributes Cst.Impl.block) :
attributes block =
match blk with
| Cst.Impl.Paragraph (attr, inline) ->
Paragraph (attr, Ast_inline.of_cst_inline inline)
| Cst.Impl.List (attr, list_type, list_spacing, blk) ->
List
( attr
, list_type
, list_spacing
, blk |> List.map (List.map of_cst_block) )
| Cst.Impl.Blockquote (attr, blk) ->
Blockquote (attr, blk |> List.map of_cst_block)
| Cst.Impl.Thematic_break atrr -> Thematic_break atrr
| Cst.Impl.Heading (attr, _heading_type, level, inline) ->
Heading (attr, level, Ast_inline.of_cst_inline inline)
| Cst.Impl.Code_block (attr, s1, s2) -> Code_block (attr, s1, s2)
| Cst.Impl.Html_block (atrr, s) -> Html_block (atrr, s)
| Cst.Impl.Definition_list
(attr, (def_list : Cst.Impl.attributes Cst.Impl.def_elt list)) ->
let def_list : attributes def_elt list =
def_list
|> List.map
(fun ({ term; defs } : Cst.Impl.attributes Cst.Impl.def_elt) ->
{ term = Ast_inline.of_cst_inline term
; defs = defs |> List.map Ast_inline.of_cst_inline
})
in
Definition_list (attr, def_list)
| Cst.Impl.Table (attr, b_list, inline) ->
let second =
b_list
|> List.map (fun (inline, cell) ->
(Ast_inline.of_cst_inline inline, cell))
in
let inline = inline |> List.map (List.map Ast_inline.of_cst_inline) in
Table (attr, second, inline)
end

module type Intf = module type of Impl
Expand Down
19 changes: 2 additions & 17 deletions src/ast_block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,8 @@ module InlineContent = struct
type 'attr t = 'attr Ast_inline.inline
end

module List_types = struct
type list_type =
| Ordered of int * char
| Bullet of char

type list_spacing =
| Loose
| Tight
end

module Table_alignments = struct
type cell_alignment =
| Default
| Left
| Centre
| Right
end
module List_types = Cst_block.List_types
module Table_alignments = Cst_block.Table_alignments

open List_types
open Table_alignments
Expand Down
44 changes: 44 additions & 0 deletions src/ast_inline.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,47 @@ and 'attr link =
; destination : string
; title : string option
}

let remove_escape_chars (s : string) : string =
let is_punct = function
| '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ','
| '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\'
| ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' ->
true
| _ -> false
in
let n = String.length s in
let buf = Buffer.create n in
let rec loop i =
if i >= n then Buffer.contents buf
else if s.[i] = '\\' && i + 1 < n && is_punct s.[i + 1] then (
Buffer.add_char buf s.[i + 1];
loop (i + 2))
else (
Buffer.add_char buf s.[i];
loop (i + 1))
in
loop 0

let rec of_cst_inline ?(escape = true) (cst : 'attr Cst_inline.inline) :
'attr inline =
match cst with
| Cst_inline.Strong (attr, _, inline) -> Strong (attr, of_cst_inline inline)
| Cst_inline.Concat (attr, inline) ->
Concat (attr, inline |> List.map of_cst_inline)
| Cst_inline.Text (attr, s) ->
Text (attr, if escape then remove_escape_chars s else s)
| Cst_inline.Emph (attr, _, inline) -> Emph (attr, of_cst_inline inline)
| Cst_inline.Code (attr, s) -> Code (attr, s)
| Cst_inline.Hard_break attr -> Hard_break attr
| Cst_inline.Soft_break attr -> Soft_break attr
| Cst_inline.Link (attr, link_type, { label; destination; title }) ->
Link
( attr
, { label = of_cst_inline ~escape:(link_type = Regular) label
; destination
; title
} )
| Cst_inline.Image (attr, { label; destination; title }) ->
Image (attr, { label = of_cst_inline label; destination; title })
| Cst_inline.Html (attr, s) -> Html (attr, s)
13 changes: 8 additions & 5 deletions src/block_parser.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
open Ast.Util
module Raw = Ast_block.Raw
open Cst.Util
module Raw = Cst_block.Raw

module Pre = struct
type container =
Expand Down Expand Up @@ -139,7 +139,7 @@ module Pre = struct
| Rempty, Lsetext_heading { level = 2; len } when len >= 3 ->
{ blocks = Thematic_break [] :: blocks; next = Rempty }
| Rempty, Latx_heading (level, text, attr) ->
{ blocks = Heading (attr, level, text) :: blocks; next = Rempty }
{ blocks = Heading (attr, Latx, level, text) :: blocks; next = Rempty }
| Rempty, Lfenced_code (ind, num, q, info, a) ->
{ blocks; next = Rfenced_code (ind, num, q, info, [], a) }
| Rempty, Lhtml (_, kind) -> process { blocks; next = Rhtml (kind, []) } s
Expand All @@ -165,7 +165,7 @@ module Pre = struct
| Lfenced_code _
| Lhtml (true, _) ) ) ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rparagraph (_ :: _ as lines), Lsetext_heading { level; _ } ->
| Rparagraph (_ :: _ as lines), Lsetext_heading { level; len } ->
let text = concat (List.map trim_left lines) in
let defs, text = link_reference_definitions text in
link_defs := defs @ !link_defs;
Expand All @@ -179,7 +179,10 @@ module Pre = struct
In that case, there's nothing to make as Heading. We can simply add `===` as Rparagraph
*)
{ blocks; next = Rparagraph [ StrSlice.to_string s ] }
else { blocks = Heading ([], level, text) :: blocks; next = Rempty }
else
{ blocks = Heading ([], Lsetext len, level, text) :: blocks
; next = Rempty
}
| Rparagraph lines, _ ->
{ blocks; next = Rparagraph (StrSlice.to_string s :: lines) }
| Rfenced_code (_, num, q, _, _, _), Lfenced_code (_, num', q1, ("", _), _)
Expand Down
4 changes: 2 additions & 2 deletions src/block_parser.mli
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
open Ast.Impl
module Raw = Ast_block.Raw
open Cst.Impl
module Raw = Cst_block.Raw

module Pre : sig
val of_channel :
Expand Down
20 changes: 20 additions & 0 deletions src/cst.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module Impl = struct
include Cst_inline
include Cst_block.List_types
include Cst_block.Table_alignments
include Cst_block.WithInline

type attributes = (string * string) list
type parse_tree = attributes block list
end

module type Intf = module type of Impl

module Util = struct
include Impl

let same_block_list_kind k1 k2 =
match (k1, k2) with
| Ordered (_, c1), Ordered (_, c2) | Bullet c1, Bullet c2 -> c1 = c2
| _ -> false
end
91 changes: 91 additions & 0 deletions src/cst_block.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
module type BlockContent = sig
type 'a t
end

module StringContent = struct
type 'attr t = string
end

module InlineContent = struct
type 'attr t = 'attr Cst_inline.inline
end

module List_types = struct
type list_type =
| Ordered of int * char
| Bullet of char

type list_spacing =
| Loose
| Tight
end

module Table_alignments = struct
type cell_alignment =
| Default
| Left
| Centre
| Right
end

open List_types
open Table_alignments

type heading_type =
| Latx
| Lsetext of int

module Make (C : BlockContent) = struct
type 'attr def_elt =
{ term : 'attr C.t
; defs : 'attr C.t list
}

(* A value of type 'attr is present in all variants of this type. We use it to associate
extra information to each node in the AST. Cn the common case, the attributes type defined
above is used. We might eventually have an alternative function to parse blocks while keeping
concrete information sucpasyh as source location and we'll use it for that as well. *)
type 'attr block =
| Paragraph of 'attr * 'attr C.t
| List of 'attr * list_type * list_spacing * 'attr block list list
| Blockquote of 'attr * 'attr block list
| Thematic_break of 'attr
| Heading of 'attr * heading_type * int * 'attr C.t
| Code_block of 'attr * string * string
| Html_block of 'attr * string
| Definition_list of 'attr * 'attr def_elt list
| Table of 'attr * ('attr C.t * cell_alignment) list * 'attr C.t list list
(** A table is represented by a header row, which is a list of pairs of
header cells and alignments, and a list of rows *)
end

module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct
module SrcBlock = Make (Src)
module DstBlock = Make (Dst)

let rec map (f : 'attr Src.t -> 'attr Dst.t) :
'attr SrcBlock.block -> 'attr DstBlock.block = function
| SrcBlock.Paragraph (attr, x) -> DstBlock.Paragraph (attr, f x)
| List (attr, ty, sp, bl) ->
List (attr, ty, sp, List.map (List.map (map f)) bl)
| Blockquote (attr, xs) -> Blockquote (attr, List.map (map f) xs)
| Thematic_break attr -> Thematic_break attr
| Heading (attr, heading_type, level, text) ->
Heading (attr, heading_type, level, f text)
| Definition_list (attr, l) ->
let f { SrcBlock.term; defs } =
{ DstBlock.term = f term; defs = List.map f defs }
in
Definition_list (attr, List.map f l)
| Code_block (attr, label, code) -> Code_block (attr, label, code)
| Html_block (attr, x) -> Html_block (attr, x)
| Table (attr, headers, rows) ->
Table
( attr
, List.map (fun (header, alignment) -> (f header, alignment)) headers
, List.map (List.map f) rows )
end

module Mapper = MakeMapper (StringContent) (InlineContent)
module Raw = Make (StringContent)
module WithInline = Make (InlineContent)
33 changes: 33 additions & 0 deletions src/cst_inline.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
(* TODO The presence of `attrs` in several of these nodes is leaking an
implementation detail: we have no support for attributes in `Concat`
`Soft_break` or `Html` nodes. The attributes are just dropped during
rendering. Should we remove this from the UI, or should we include
those somehow? Or should we include these in the document model, but
but with the caveat that most renderings of the document don't support
attributes in these nodes? *)

type emph_style =
| Star
| Underscore

type link_type =
| Regular
| Autolink

type 'attr inline =
| Concat of 'attr * 'attr inline list
| Text of 'attr * string
| Emph of 'attr * emph_style * 'attr inline
| Strong of 'attr * emph_style * 'attr inline
| Code of 'attr * string
| Hard_break of 'attr
| Soft_break of 'attr
| Link of 'attr * link_type * 'attr link
| Image of 'attr * 'attr link
| Html of 'attr * string

and 'attr link =
{ label : 'attr inline
; destination : string
; title : string option
}
30 changes: 25 additions & 5 deletions src/omd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,40 @@ let toc = Toc.toc

let parse_inline defs s = Parser.inline defs (Parser.P.of_string s)

let parse_inlines (md, defs) : doc =
let parse_inlines (md, defs) : Cst.Impl.parse_tree =
let defs =
let f (def : attributes Parser.link_def) =
let f (def : Cst.Impl.attributes Parser.link_def) =
{ def with label = Parser.normalize def.label }
in
List.map f defs
in
List.map (Ast_block.Mapper.map (parse_inline defs)) md
List.map (Cst_block.Mapper.map (parse_inline defs)) md

let escape_html_entities = Html.htmlentities
let of_channel ic : doc = parse_inlines (Block_parser.Pre.of_channel ic)
let of_string s = parse_inlines (Block_parser.Pre.of_string s)

module Parse_tree = struct
let of_channel ic : Cst.Impl.parse_tree =
parse_inlines (Block_parser.Pre.of_channel ic)

let of_string s : Cst.Impl.parse_tree =
parse_inlines (Block_parser.Pre.of_string s)
end

let of_channel ic : Ast.Impl.doc =
let cst : Cst.Impl.attributes Cst.Impl.block list =
Parse_tree.of_channel ic
in
let ast = List.map Ast.Impl.of_cst_block cst in
ast

let of_string s : Ast.Impl.doc =
let cst : Cst.Impl.attributes Cst.Impl.block list = Parse_tree.of_string s in
let ast = List.map Ast.Impl.of_cst_block cst in
ast

let to_html ?auto_identifiers doc =
Html.to_string (Html.of_doc ?auto_identifiers doc)

let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast)

module Print = Print
9 changes: 9 additions & 0 deletions src/omd.mli
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,18 @@ val escape_html_entities : string -> string
['&'] into ["&amp;"], ['<'] in ["&lt;"] and ['>'] into ["&gt;"]
*)

module Parse_tree : sig
val of_channel : in_channel -> Cst.Impl.parse_tree
val of_string : string -> Cst.Impl.parse_tree
end

(** {2 Converting to and from documents} *)

val of_channel : in_channel -> doc
val of_string : string -> doc
val to_html : ?auto_identifiers:bool -> doc -> string
val to_sexp : doc -> string

module Print : sig
val pp : Format.formatter -> Cst.Impl.parse_tree -> unit
end
Loading