Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update spec to 0.30 #266

Merged
merged 8 commits into from
May 24, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ formats. In addition to the library, a command-line tool `omd` is included to
easily convert markdown into HTML.

Omd aims at implementing the [Commonmark](https://commonmark.org/) standard. The
version currently targeted is [0.29](https://spec.commonmark.org/0.29/).
version currently targeted is [0.30](https://spec.commonmark.org/0.30/).

Omd is developed on GitHub. If you need to report an issue, please do so at
https://github.com/ocaml/omd/issues.
Expand Down
98 changes: 19 additions & 79 deletions src/parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ open Compat
module Sub : sig
type t

val of_string : string -> t
val of_string : ?off:int -> string -> t
val to_string : t -> string
val offset : int -> t -> t
val lexbuf : t -> Lexing.lexbuf
Expand All @@ -17,6 +17,7 @@ module Sub : sig
val for_all : (char -> bool) -> t -> bool
val exists : (char -> bool) -> t -> bool
val is_empty : t -> bool
val get_offset : t -> int
val length : t -> int
val sub : len:int -> t -> t
end = struct
Expand All @@ -26,9 +27,10 @@ end = struct
; len : int
}

let of_string base = { base; off = 0; len = String.length base }
let of_string ?(off = 0) base = { base; off; len = String.length base - off }
let to_string { base; off; len } = String.sub base off len
let print ppf s = Format.fprintf ppf "%S" (to_string s)
let get_offset { off; _ } = off
let length { len; _ } = len

let offset n { base; off; len } =
Expand Down Expand Up @@ -80,10 +82,9 @@ end = struct
in
loop n s

let tails n s =
let tails n { base; off; len } =
if n < 0 then invalid_arg "tails";
let rec loop n s = if n = 0 then s else loop (pred n) (tail s) in
loop n s
{ base; off = off + n; len = len - n }

let is_empty s = length s = 0

Expand Down Expand Up @@ -127,6 +128,7 @@ module P : sig
val peek_before : char -> state -> char
val peek_after : char -> state -> char
val pair : 'a t -> 'b t -> ('a * 'b) t
val on_sub : (Sub.t -> 'a * Sub.t) -> 'a t
end = struct
type state =
{ str : string
Expand Down Expand Up @@ -214,6 +216,11 @@ end = struct
let x = p st in
let y = q st in
(x, y)

let on_sub fn st =
let result, s = fn (Sub.of_string ~off:st.pos st.str) in
st.pos <- Sub.get_offset s;
result
end

type html_kind =
Expand Down Expand Up @@ -399,7 +406,7 @@ let entity s =
match Sub.heads 2 s with
| '#' :: ('x' | 'X') :: _ ->
let rec loop m n s =
if m > 8 then raise Fail;
if m > 6 then raise Fail;
match Sub.head s with
| Some ('a' .. 'f' as c) ->
loop
Expand All @@ -425,7 +432,7 @@ let entity s =
loop 0 0 (Sub.tails 2 s)
| '#' :: _ ->
let rec loop m n s =
if m > 8 then raise Fail;
if m > 7 then raise Fail;
match Sub.head s with
| Some ('0' .. '9' as c) ->
loop (succ m) ((n * 10) + Char.code c - Char.code '0') (Sub.tail s)
Expand Down Expand Up @@ -622,7 +629,7 @@ let known_tags =
; "ul"
]

let special_tags = [ "script"; "pre"; "style" ]
let special_tags = [ "pre"; "script"; "style"; "textarea" ]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appreciate the alphabetizing.


let known_tag s =
let s = String.lowercase_ascii s in
Expand Down Expand Up @@ -834,77 +841,10 @@ let inline_attribute_string s =
attr

let entity buf st =
let p = pos st in
if next st <> '&' then raise Fail;
match peek st with
| Some '#' -> (
junk st;
match peek st with
| Some ('x' | 'X') ->
junk st;
let rec aux n m =
if n > 8 then Buffer.add_string buf (range st p (pos st - p))
else
match peek st with
| Some ('0' .. '9' as c) ->
junk st;
aux (succ n) ((m * 16) + Char.code c - Char.code '0')
| Some ('a' .. 'f' as c) ->
junk st;
aux (succ n) ((m * 16) + Char.code c - Char.code 'a' + 10)
| Some ('A' .. 'F' as c) ->
junk st;
aux (succ n) ((m * 16) + Char.code c - Char.code 'A' + 10)
| Some ';' ->
junk st;
if n = 0 then Buffer.add_string buf (range st p (pos st - p))
else
let u =
if Uchar.is_valid m && m <> 0 then Uchar.of_int m
else Uchar.rep
in
Buffer.add_utf_8_uchar buf u
| Some _ | None -> Buffer.add_string buf (range st p (pos st - p))
in
aux 0 0
| Some '0' .. '9' ->
let rec aux n m =
if n > 8 then Buffer.add_string buf (range st p (pos st - p))
else
match peek st with
| Some ('0' .. '9' as c) ->
junk st;
aux (succ n) ((m * 10) + Char.code c - Char.code '0')
| Some ';' ->
junk st;
if n = 0 then Buffer.add_string buf (range st p (pos st - p))
else
let u =
if Uchar.is_valid m && m <> 0 then Uchar.of_int m
else Uchar.rep
in
Buffer.add_utf_8_uchar buf u
| Some _ | None -> Buffer.add_string buf (range st p (pos st - p))
in
aux 0 0
| Some _ | None -> Buffer.add_string buf (range st p (pos st - p)))
| Some ('0' .. '9' | 'a' .. 'z' | 'A' .. 'Z') ->
let q = pos st in
let rec aux () =
match peek st with
| Some ('0' .. '9' | 'a' .. 'z' | 'A' .. 'Z') ->
junk st;
aux ()
| Some ';' -> (
let name = range st q (pos st - q) in
junk st;
match Entities.f name with
| [] -> Buffer.add_string buf (range st p (pos st - p))
| _ :: _ as cps -> List.iter (Buffer.add_utf_8_uchar buf) cps)
| Some _ | None -> Buffer.add_string buf (range st p (pos st - p))
in
aux ()
| Some _ | None -> Buffer.add_string buf (range st p (pos st - p))
junk st;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a wonderful cleanup! 😍

match on_sub entity st with
| cs -> List.iter (Buffer.add_utf_8_uchar buf) cs
| exception Fail -> Buffer.add_char buf '&'

module Pre = struct
type delim =
Expand Down
34 changes: 29 additions & 5 deletions tests/dune.inc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/extract_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ let protect ~finally f =
r

let disabled =
[ 175; 184; 185; 410; 411; 414; 415; 416; 428; 468; 469; 516; 536 ]
[ 206; 215; 216; 410; 411; 414; 415; 416; 428; 468; 469; 519; 539 ]

let with_open_in fn f =
let ic = open_in fn in
Expand Down
Loading