diff --git a/md2mld.opam b/md2mld.opam index 603f303..2b8ac39 100644 --- a/md2mld.opam +++ b/md2mld.opam @@ -11,7 +11,7 @@ depends: [ "ocaml" "dune" {>= "2.0"} "base-bytes" - "omd" + "omd" {= "2.0~alpha"} ] build: ["dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test}] dev-repo: "git+https://github.com/mseri/md2mld.git" diff --git a/src/backend.ml b/src/backend.ml index 796f844..3f7b10b 100644 --- a/src/backend.ml +++ b/src/backend.ml @@ -5,41 +5,201 @@ (* http://www.isc.org/downloads/software-support-policy/isc-license/ *) (***********************************************************************) -open Omd_representation -open Omd_utils +(* From https://github.com/ocaml/omd/pull/215 *) -(* the follwing to functions come from - * https://github.com/ocaml/omd/blob/master/src/omd_backend.ml#L632 *) -let string_of_attrs attrs = - let b = Buffer.create 1024 in - List.iter +type t = + | TripleSurround of string * t * string * t * string + | Surround of string * t * string + | BlockSurround of string * t * string + | GeneralBlock of t + | Text of string + | Raw of string + | Null + | Concat of t * t + +let nl = Raw "\n" + +let escape_uri s = + let b = Buffer.create (String.length s) in + String.iter (function - | a, Some v -> - if not (String.contains v '\'') - then Printf.bprintf b " %s='%s'" a v - else if not (String.contains v '"') - then Printf.bprintf b " %s=\"%s\"" a v - else Printf.bprintf b " %s=\"%s\"" a v - | a, None -> - (* if html4 then *) - (* Printf.bprintf b " %s='%s'" a a *) - (* else *) - Printf.bprintf b " %s=''" a - (* HTML5 *)) - attrs; + | ( '!' + | '*' + | '\'' + | '(' + | ')' + | ';' + | ':' + | '@' + | '=' + | '+' + | '$' + | ',' + | '/' + | '?' + | '%' + | '#' + | 'A' .. 'Z' + | 'a' .. 'z' + | '0' .. '9' + | '-' | '_' | '.' | '~' ) as c -> Buffer.add_char b c + | '&' -> Buffer.add_string b "&" + | _ as c -> Printf.bprintf b "%%%2X" (Char.code c)) + s; Buffer.contents b -let filter_text_omd_rev l = - let rec loop b r = function - | [] -> if b then r else l - | ("media:type", Some "text/omd") :: tl -> loop true r tl - | e :: tl -> loop b (e :: r) tl +let to_plain_text t = + let buf = Buffer.create 1024 in + let rec go = function + | Text t -> Buffer.add_string buf t + | Concat (t1, t2) -> + go t1; + go t2 + | _ -> () in - loop false [] l + go t; + Buffer.contents buf + + +let rec add_to_buffer buf = function + | TripleSurround (s1, t1, s2, t2, s3) -> + Printf.bprintf buf "%s%a%s%a%s" s1 add_to_buffer t1 s2 add_to_buffer t2 s3 + | Surround (s1, t1, s2) -> Printf.bprintf buf "%s%a%s" s1 add_to_buffer t1 s2 + | BlockSurround (s1, t1, s2) -> Printf.bprintf buf "\n\b%s%a%s\n" s1 add_to_buffer t1 s2 + | GeneralBlock t -> Printf.bprintf buf "\n%a\n" add_to_buffer t + | Text s -> Buffer.add_string buf s + | Raw s -> Buffer.add_string buf s + | Null -> () + | Concat (t1, t2) -> + add_to_buffer buf t1; + add_to_buffer buf t2 + + +let text s = Text s +let raw s = Raw s + +let concat s1 s2 = + match s1, s2 with + | Null, s | s, Null -> s + | s1, s2 -> Concat (s1, s2) -let rec mld_of_md ~min_header md = +let concat_map f l = List.fold_left (fun accu x -> concat accu (f x)) Null l + +let cross_reference_words = + [ "module" + ; "modtype" + ; "class" + ; "classtype" + ; "val" + ; "type" + ; "exception" + ; "attribute" + ; "method" + ; "section" + ; "const" + ; "recfield" + ] + + +let is_cross_reference_regexps = + cross_reference_words |> List.map (fun w -> Str.regexp_string (w ^ ":")) + + +let inferred_cross_reference = Str.regexp_string "ref:" + +let rec inline ({ il_desc; il_attributes = _ } : Omd.inline) = + match il_desc with + | Concat l -> concat_map inline l + | Text s -> text s + | Emph il -> Surround ("{e ", inline il, "}") + | Strong il -> Surround ("{b ", inline il, "}") + | Code s -> Surround ("[", text s, "]") + | Hard_break -> text "\n\n" + | Soft_break -> text "\n" + | Html body -> Surround ("{%html: ", text body, "%}") + | Link { label; destination; title = _ } -> + let cross_reference = + match label with + | { il_desc = Text s; _ } when s == destination -> + if Str.string_match inferred_cross_reference destination 0 + then Some (Str.string_after destination 4) + else if List.exists + (fun r -> Str.string_match r destination 0) + is_cross_reference_regexps + then Some destination + else None + | _ -> None + in + (match cross_reference with + | Some cross_reference -> Surround ("{!", text cross_reference, "}") + | None -> TripleSurround ("{{: ", text destination, "} ", inline label, "}")) + | Image { label; destination; title } -> + let img = + " "" + | Some title -> " title=\"" ^ title ^ "\"") + ^ "/>" + in + Surround ("{%html: ", text img, "%}") + + +let rec block min_head_lvl ({ bl_desc; bl_attributes = _attr } : Omd.block) = + match bl_desc with + | Blockquote q -> BlockSurround ("{v ", concat_map (block min_head_lvl) q, "v}") + | Paragraph md -> GeneralBlock (inline md) + | List (ty, sp, bl) -> + let sign = + match ty with + | Ordered _ -> "+ " + | Bullet _ -> "- " + in + let li t = + let block' (t : Omd.block) = + match t.bl_desc, sp with + | Paragraph t, Tight -> concat (inline t) nl + | _ -> block min_head_lvl t + in + let nl = if sp = Tight then Null else nl in + Surround (sign, concat nl (concat_map block' t), "") + in + concat nl (concat_map li bl) + | Code_block (_label, code) -> BlockSurround ("{[\n", text code, "]}") + | Thematic_break -> GeneralBlock (text "***") + | Html_block body -> raw body + | Heading (level, text) -> + BlockSurround + ( "{" + ^ (match level + min_head_lvl with + | 1 -> "0" + | 2 -> "1" + | 3 -> "2" + | 4 -> "3" + | 5 -> "4" + | _ -> "5") + ^ " " + , inline text + , "}" ) + | Definition_list _ -> Null + + +let of_doc ?(min_head_lvl = 0) doc = concat_map (block min_head_lvl) doc + +let to_string t = + let buf = Buffer.create 1024 in + add_to_buffer buf t; + Buffer.contents buf + + +let mld_of_md ~min_head_lvl md = to_string @@ of_doc ~min_head_lvl md + +(* let rec mld_of_md ~min_header md = let quote ?(indent = 0) s = let b = Buffer.create (String.length s) in let l = String.length s in @@ -71,7 +231,7 @@ let rec mld_of_md ~min_header md = in let init_header lvl = Printf.sprintf "{%d " (min_header + lvl) in let references = ref None in - let rec loop ?(fst_p_in_li = true) ?(is_in_list = false) list_indent l = + let rec loop ?(fst_p_in_li = true) ?(is_in_list = false) list_indent (l : Omd.doc) = (* [list_indent: int] is the indentation level in number of spaces. *) (* [is_in_list: bool] is necessary to know if we are inside a paragraph which is inside a list item because those need to be indented! *) @@ -79,33 +239,25 @@ let rec mld_of_md ~min_header md = loop ~fst_p_in_li ~is_in_list list_indent l in match l with - | X x :: tl -> - (match x#to_t md with - | Some t -> loop list_indent t - | None -> - (match x#to_html ~indent:0 Omd_backend.html_of_md md with - | Some s -> Printf.bprintf b "{%%html: %s %%}\n" s - | None -> ())); - loop list_indent tl - | Blockquote q :: tl -> + | { bl_desc = Blockquote q; _ } :: tl -> Buffer.add_string b (quote ~indent:list_indent (mld_of_md ~min_header q)); if tl <> [] then Buffer.add_string b "\n"; loop list_indent tl (* TODO: we need to accumulate the references separately *) - | Ref (rc, _name, _text, fallback) :: tl -> + | { bl_desc = Ref (rc, _name, _text, fallback); _ } :: tl -> if !references = None then references := Some rc; loop list_indent (Raw fallback#to_string :: tl) - | Img_ref (rc, _name, _alt, fallback) :: tl -> + | { bl_desc = Img_ref (rc, _name, _alt, fallback); _ } :: tl -> if !references = None then references := Some rc; loop list_indent (Raw fallback#to_string :: tl) - | Paragraph [] :: tl -> loop list_indent tl - | Paragraph md :: tl -> + | { bl_desc = Paragraph []; _ } :: tl -> loop list_indent tl + | { bl_desc = Paragraph md; _ } :: tl -> if is_in_list then if fst_p_in_li then add_spaces (list_indent - 2) else add_spaces list_indent; loop ~fst_p_in_li:false list_indent md; Printf.bprintf b "\n\n"; loop ~fst_p_in_li:false list_indent tl - | Img (alt, src, title) :: tl -> + | { bl_desc = Img (alt, src, title); _ } :: tl -> Printf.bprintf b "{%%html: %s%%}" @@ -113,20 +265,20 @@ let rec mld_of_md ~min_header md = (if alt = "" then "" else Printf.sprintf " alt=\"%s\"" alt) title; loop list_indent tl - | Text t :: tl -> + | { bl_desc = Text t; _ } :: tl -> Printf.bprintf b "%s" (Omd_backend.escape_markdown_characters t); loop list_indent tl - | Emph md :: tl -> + | { bl_desc = Emph md; _ } :: tl -> Buffer.add_string b "{e "; loop list_indent md; Buffer.add_string b "}"; loop list_indent tl - | Bold md :: tl -> + | { bl_desc = Bold md; _ } :: tl -> Buffer.add_string b "{b "; loop list_indent md; Buffer.add_string b "}"; loop list_indent tl - | Ol l :: tl | Olp l :: tl -> + | { bl_desc = Ol l; _ } :: tl | { bl_desc = Olp l; _ } :: tl -> if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' then Buffer.add_char b '\n'; add_spaces list_indent; @@ -142,7 +294,7 @@ let rec mld_of_md ~min_header md = Buffer.add_string b "}"; if list_indent = 0 then Buffer.add_char b '\n'; loop list_indent tl - | Ul l :: tl | Ulp l :: tl -> + | { bl_desc = Ul l; _ } :: tl | { bl_desc = Ulp l; _ } :: tl -> if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' then Buffer.add_char b '\n'; add_spaces list_indent; @@ -158,39 +310,40 @@ let rec mld_of_md ~min_header md = Buffer.add_string b "}"; if list_indent = 0 then Buffer.add_char b '\n'; loop list_indent tl - | Code (_lang, c) :: tl -> + | { bl_desc = Code (_lang, c); _ } :: tl -> Buffer.add_char b '['; Printf.bprintf b "%s" c; Buffer.add_char b ']'; loop list_indent tl - | Code_block (_lang, c) :: tl -> + | { bl_desc = Code_block (_lang, c); _ } :: tl -> Buffer.add_string b "{[\n"; Buffer.add_string b c; if not (Buffer.nth b (Buffer.length b - 1) = '\n') then Buffer.add_char b '\n'; Buffer.add_string b "]}\n"; loop list_indent tl - | Br :: tl -> + | { bl_desc = Br; _ } :: tl -> Buffer.add_string b "\n\n"; loop list_indent tl - | Hr :: tl -> + | { bl_desc = Hr; _ } :: tl -> Buffer.add_string b "{%html: