diff --git a/md2mld.opam b/md2mld.opam index 603f303..2b8ac39 100644 --- a/md2mld.opam +++ b/md2mld.opam @@ -11,7 +11,7 @@ depends: [ "ocaml" "dune" {>= "2.0"} "base-bytes" - "omd" + "omd" {= "2.0~alpha"} ] build: ["dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test}] dev-repo: "git+https://github.com/mseri/md2mld.git" diff --git a/src/backend.ml b/src/backend.ml index 796f844..3f7b10b 100644 --- a/src/backend.ml +++ b/src/backend.ml @@ -5,41 +5,201 @@ (* http://www.isc.org/downloads/software-support-policy/isc-license/ *) (***********************************************************************) -open Omd_representation -open Omd_utils +(* From https://github.com/ocaml/omd/pull/215 *) -(* the follwing to functions come from - * https://github.com/ocaml/omd/blob/master/src/omd_backend.ml#L632 *) -let string_of_attrs attrs = - let b = Buffer.create 1024 in - List.iter +type t = + | TripleSurround of string * t * string * t * string + | Surround of string * t * string + | BlockSurround of string * t * string + | GeneralBlock of t + | Text of string + | Raw of string + | Null + | Concat of t * t + +let nl = Raw "\n" + +let escape_uri s = + let b = Buffer.create (String.length s) in + String.iter (function - | a, Some v -> - if not (String.contains v '\'') - then Printf.bprintf b " %s='%s'" a v - else if not (String.contains v '"') - then Printf.bprintf b " %s=\"%s\"" a v - else Printf.bprintf b " %s=\"%s\"" a v - | a, None -> - (* if html4 then *) - (* Printf.bprintf b " %s='%s'" a a *) - (* else *) - Printf.bprintf b " %s=''" a - (* HTML5 *)) - attrs; + | ( '!' + | '*' + | '\'' + | '(' + | ')' + | ';' + | ':' + | '@' + | '=' + | '+' + | '$' + | ',' + | '/' + | '?' + | '%' + | '#' + | 'A' .. 'Z' + | 'a' .. 'z' + | '0' .. '9' + | '-' | '_' | '.' | '~' ) as c -> Buffer.add_char b c + | '&' -> Buffer.add_string b "&" + | _ as c -> Printf.bprintf b "%%%2X" (Char.code c)) + s; Buffer.contents b -let filter_text_omd_rev l = - let rec loop b r = function - | [] -> if b then r else l - | ("media:type", Some "text/omd") :: tl -> loop true r tl - | e :: tl -> loop b (e :: r) tl +let to_plain_text t = + let buf = Buffer.create 1024 in + let rec go = function + | Text t -> Buffer.add_string buf t + | Concat (t1, t2) -> + go t1; + go t2 + | _ -> () in - loop false [] l + go t; + Buffer.contents buf + + +let rec add_to_buffer buf = function + | TripleSurround (s1, t1, s2, t2, s3) -> + Printf.bprintf buf "%s%a%s%a%s" s1 add_to_buffer t1 s2 add_to_buffer t2 s3 + | Surround (s1, t1, s2) -> Printf.bprintf buf "%s%a%s" s1 add_to_buffer t1 s2 + | BlockSurround (s1, t1, s2) -> Printf.bprintf buf "\n\b%s%a%s\n" s1 add_to_buffer t1 s2 + | GeneralBlock t -> Printf.bprintf buf "\n%a\n" add_to_buffer t + | Text s -> Buffer.add_string buf s + | Raw s -> Buffer.add_string buf s + | Null -> () + | Concat (t1, t2) -> + add_to_buffer buf t1; + add_to_buffer buf t2 + + +let text s = Text s +let raw s = Raw s + +let concat s1 s2 = + match s1, s2 with + | Null, s | s, Null -> s + | s1, s2 -> Concat (s1, s2) -let rec mld_of_md ~min_header md = +let concat_map f l = List.fold_left (fun accu x -> concat accu (f x)) Null l + +let cross_reference_words = + [ "module" + ; "modtype" + ; "class" + ; "classtype" + ; "val" + ; "type" + ; "exception" + ; "attribute" + ; "method" + ; "section" + ; "const" + ; "recfield" + ] + + +let is_cross_reference_regexps = + cross_reference_words |> List.map (fun w -> Str.regexp_string (w ^ ":")) + + +let inferred_cross_reference = Str.regexp_string "ref:" + +let rec inline ({ il_desc; il_attributes = _ } : Omd.inline) = + match il_desc with + | Concat l -> concat_map inline l + | Text s -> text s + | Emph il -> Surround ("{e ", inline il, "}") + | Strong il -> Surround ("{b ", inline il, "}") + | Code s -> Surround ("[", text s, "]") + | Hard_break -> text "\n\n" + | Soft_break -> text "\n" + | Html body -> Surround ("{%html: ", text body, "%}") + | Link { label; destination; title = _ } -> + let cross_reference = + match label with + | { il_desc = Text s; _ } when s == destination -> + if Str.string_match inferred_cross_reference destination 0 + then Some (Str.string_after destination 4) + else if List.exists + (fun r -> Str.string_match r destination 0) + is_cross_reference_regexps + then Some destination + else None + | _ -> None + in + (match cross_reference with + | Some cross_reference -> Surround ("{!", text cross_reference, "}") + | None -> TripleSurround ("{{: ", text destination, "} ", inline label, "}")) + | Image { label; destination; title } -> + let img = + "\"" "" + | Some title -> " title=\"" ^ title ^ "\"") + ^ "/>" + in + Surround ("{%html: ", text img, "%}") + + +let rec block min_head_lvl ({ bl_desc; bl_attributes = _attr } : Omd.block) = + match bl_desc with + | Blockquote q -> BlockSurround ("{v ", concat_map (block min_head_lvl) q, "v}") + | Paragraph md -> GeneralBlock (inline md) + | List (ty, sp, bl) -> + let sign = + match ty with + | Ordered _ -> "+ " + | Bullet _ -> "- " + in + let li t = + let block' (t : Omd.block) = + match t.bl_desc, sp with + | Paragraph t, Tight -> concat (inline t) nl + | _ -> block min_head_lvl t + in + let nl = if sp = Tight then Null else nl in + Surround (sign, concat nl (concat_map block' t), "") + in + concat nl (concat_map li bl) + | Code_block (_label, code) -> BlockSurround ("{[\n", text code, "]}") + | Thematic_break -> GeneralBlock (text "***") + | Html_block body -> raw body + | Heading (level, text) -> + BlockSurround + ( "{" + ^ (match level + min_head_lvl with + | 1 -> "0" + | 2 -> "1" + | 3 -> "2" + | 4 -> "3" + | 5 -> "4" + | _ -> "5") + ^ " " + , inline text + , "}" ) + | Definition_list _ -> Null + + +let of_doc ?(min_head_lvl = 0) doc = concat_map (block min_head_lvl) doc + +let to_string t = + let buf = Buffer.create 1024 in + add_to_buffer buf t; + Buffer.contents buf + + +let mld_of_md ~min_head_lvl md = to_string @@ of_doc ~min_head_lvl md + +(* let rec mld_of_md ~min_header md = let quote ?(indent = 0) s = let b = Buffer.create (String.length s) in let l = String.length s in @@ -71,7 +231,7 @@ let rec mld_of_md ~min_header md = in let init_header lvl = Printf.sprintf "{%d " (min_header + lvl) in let references = ref None in - let rec loop ?(fst_p_in_li = true) ?(is_in_list = false) list_indent l = + let rec loop ?(fst_p_in_li = true) ?(is_in_list = false) list_indent (l : Omd.doc) = (* [list_indent: int] is the indentation level in number of spaces. *) (* [is_in_list: bool] is necessary to know if we are inside a paragraph which is inside a list item because those need to be indented! *) @@ -79,33 +239,25 @@ let rec mld_of_md ~min_header md = loop ~fst_p_in_li ~is_in_list list_indent l in match l with - | X x :: tl -> - (match x#to_t md with - | Some t -> loop list_indent t - | None -> - (match x#to_html ~indent:0 Omd_backend.html_of_md md with - | Some s -> Printf.bprintf b "{%%html: %s %%}\n" s - | None -> ())); - loop list_indent tl - | Blockquote q :: tl -> + | { bl_desc = Blockquote q; _ } :: tl -> Buffer.add_string b (quote ~indent:list_indent (mld_of_md ~min_header q)); if tl <> [] then Buffer.add_string b "\n"; loop list_indent tl (* TODO: we need to accumulate the references separately *) - | Ref (rc, _name, _text, fallback) :: tl -> + | { bl_desc = Ref (rc, _name, _text, fallback); _ } :: tl -> if !references = None then references := Some rc; loop list_indent (Raw fallback#to_string :: tl) - | Img_ref (rc, _name, _alt, fallback) :: tl -> + | { bl_desc = Img_ref (rc, _name, _alt, fallback); _ } :: tl -> if !references = None then references := Some rc; loop list_indent (Raw fallback#to_string :: tl) - | Paragraph [] :: tl -> loop list_indent tl - | Paragraph md :: tl -> + | { bl_desc = Paragraph []; _ } :: tl -> loop list_indent tl + | { bl_desc = Paragraph md; _ } :: tl -> if is_in_list then if fst_p_in_li then add_spaces (list_indent - 2) else add_spaces list_indent; loop ~fst_p_in_li:false list_indent md; Printf.bprintf b "\n\n"; loop ~fst_p_in_li:false list_indent tl - | Img (alt, src, title) :: tl -> + | { bl_desc = Img (alt, src, title); _ } :: tl -> Printf.bprintf b "{%%html: %s%%}" @@ -113,20 +265,20 @@ let rec mld_of_md ~min_header md = (if alt = "" then "" else Printf.sprintf " alt=\"%s\"" alt) title; loop list_indent tl - | Text t :: tl -> + | { bl_desc = Text t; _ } :: tl -> Printf.bprintf b "%s" (Omd_backend.escape_markdown_characters t); loop list_indent tl - | Emph md :: tl -> + | { bl_desc = Emph md; _ } :: tl -> Buffer.add_string b "{e "; loop list_indent md; Buffer.add_string b "}"; loop list_indent tl - | Bold md :: tl -> + | { bl_desc = Bold md; _ } :: tl -> Buffer.add_string b "{b "; loop list_indent md; Buffer.add_string b "}"; loop list_indent tl - | Ol l :: tl | Olp l :: tl -> + | { bl_desc = Ol l; _ } :: tl | { bl_desc = Olp l; _ } :: tl -> if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' then Buffer.add_char b '\n'; add_spaces list_indent; @@ -142,7 +294,7 @@ let rec mld_of_md ~min_header md = Buffer.add_string b "}"; if list_indent = 0 then Buffer.add_char b '\n'; loop list_indent tl - | Ul l :: tl | Ulp l :: tl -> + | { bl_desc = Ul l; _ } :: tl | { bl_desc = Ulp l; _ } :: tl -> if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' then Buffer.add_char b '\n'; add_spaces list_indent; @@ -158,39 +310,40 @@ let rec mld_of_md ~min_header md = Buffer.add_string b "}"; if list_indent = 0 then Buffer.add_char b '\n'; loop list_indent tl - | Code (_lang, c) :: tl -> + | { bl_desc = Code (_lang, c); _ } :: tl -> Buffer.add_char b '['; Printf.bprintf b "%s" c; Buffer.add_char b ']'; loop list_indent tl - | Code_block (_lang, c) :: tl -> + | { bl_desc = Code_block (_lang, c); _ } :: tl -> Buffer.add_string b "{[\n"; Buffer.add_string b c; if not (Buffer.nth b (Buffer.length b - 1) = '\n') then Buffer.add_char b '\n'; Buffer.add_string b "]}\n"; loop list_indent tl - | Br :: tl -> + | { bl_desc = Br; _ } :: tl -> Buffer.add_string b "\n\n"; loop list_indent tl - | Hr :: tl -> + | { bl_desc = Hr; _ } :: tl -> Buffer.add_string b "{%html:
%}\n"; loop list_indent tl - | Raw s :: tl -> + | { bl_desc = Raw s; _ } :: tl -> Buffer.add_string b s; loop list_indent tl - | Raw_block s :: tl -> + | { bl_desc = Raw_block s; _ } :: tl -> Buffer.add_char b '\n'; Buffer.add_string b s; Buffer.add_char b '\n'; loop list_indent tl - | Html (tagname, attrs, []) :: tl when StringSet.mem tagname html_void_elements -> + | { bl_desc = Html (tagname, attrs, []); _ } :: tl + when StringSet.mem tagname html_void_elements -> Buffer.add_string b "{%html: "; Printf.bprintf b "<%s" tagname; Buffer.add_string b (string_of_attrs attrs); Buffer.add_string b " />"; Buffer.add_string b "}\n"; loop list_indent tl - | Html (tagname, attrs, body) :: tl -> + | { bl_desc = Html (tagname, attrs, body); _ } :: tl -> Buffer.add_string b "{%html: "; let a = filter_text_omd_rev attrs in Printf.bprintf b "<%s" tagname; @@ -202,10 +355,11 @@ let rec mld_of_md ~min_header md = Printf.bprintf b "" tagname; Buffer.add_string b "}\n"; loop list_indent tl - | Html_block (tagname, attrs, body) :: tl -> + | { bl_desc = Html_block (tagname, attrs, body); _ } :: tl -> let needs_newlines = match tl with - | NL :: Paragraph p :: _ | Paragraph p :: _ -> p <> [] + | { bl_desc = NL; _ } :: Paragraph p :: _ | { bl_desc = Paragraph p; _ } :: _ -> + p <> [] | ( H1 _ | H2 _ | H3 _ | H4 _ | H5 _ | H6 _ | Ul _ | Ol _ | Ulp _ | Olp _ | Code (_, _) | Code_block (_, _) @@ -217,9 +371,10 @@ let rec mld_of_md ~min_header md = | Blockquote _ | Img (_, _, _) ) :: _ -> true - | (Html_block (_, _, _) | Html_comment _ | Raw _ | Raw_block _) :: _ -> false - | X _ :: _ -> false - | NL :: _ -> false + | { bl_desc = Html_block (_, _, _) | Html_comment _ | Raw _ | Raw_block _; _ } + :: _ -> false + | { bl_desc = X _; _ } :: _ -> false + | { bl_desc = NL; _ } :: _ -> false | [] -> false in Buffer.add_string b "{%html: "; @@ -243,43 +398,43 @@ let rec mld_of_md ~min_header md = Buffer.add_string b " %}"; if needs_newlines then Buffer.add_string b "\n\n"; loop list_indent tl) - | Html_comment s :: tl -> + | { bl_desc = Html_comment s; _ } :: tl -> Printf.bprintf b "{%%html: %s %%}" s; loop list_indent tl - | Url (href, s, _title) :: tl -> + | { bl_desc = Url (href, s, _title); _ } :: tl -> Printf.bprintf b "{{: %s} %s}" href (mld_of_md ~min_header s); loop list_indent tl - | H1 md :: tl -> + | { bl_desc = H1 md; _ } :: tl -> Buffer.add_string b (init_header 0); loop list_indent md; Buffer.add_string b "}\n"; loop list_indent tl - | H2 md :: tl -> + | { bl_desc = H2 md; _ } :: tl -> Buffer.add_string b (init_header 1); loop list_indent md; Buffer.add_string b "}\n"; loop list_indent tl - | H3 md :: tl -> + | { bl_desc = H3 md; _ } :: tl -> Buffer.add_string b (init_header 2); loop list_indent md; Buffer.add_string b "}\n"; loop list_indent tl - | H4 md :: tl -> + | { bl_desc = H4 md; _ } :: tl -> Buffer.add_string b (init_header 3); loop list_indent md; Buffer.add_string b "}\n"; loop list_indent tl - | H5 md :: tl -> + | { bl_desc = H5 md; _ } :: tl -> Buffer.add_string b (init_header 4); loop list_indent md; Buffer.add_string b "}\n"; loop list_indent tl - | H6 md :: tl -> + | { bl_desc = H6 md; _ } :: tl -> Buffer.add_string b (init_header 5); loop list_indent md; Buffer.add_string b "}\n"; loop list_indent tl - | NL :: tl -> + | { bl_desc = NL; _ } :: tl -> if Buffer.length b = 1 || (Buffer.length b > 1 && not @@ -299,4 +454,4 @@ let rec mld_of_md ~min_header md = Printf.bprintf b "%s: {{: %s} %s}\n" name url (if title = "" then url else title)) r#get_all); let res = Buffer.contents b in - res + res *) diff --git a/src/backend.mli b/src/backend.mli index 051357d..4276876 100644 --- a/src/backend.mli +++ b/src/backend.mli @@ -5,9 +5,9 @@ (* http://www.isc.org/downloads/software-support-policy/isc-license/ *) (***********************************************************************) -(** [mld_of_md ~min_header md] returns a string containing the mld version of +(** [mld_of_md ~min_head_lvl md] returns a string containing the mld version of [md]. Note that [md] uses the {!Omd} internal representation of - Markdown. The ~min_header parameters allows you to choose the value + Markdown. The ~min_head_lvl parameters allows you to choose the value for the section heading associated to

, all the other values will be determined incrementally. *) -val mld_of_md : min_header:int -> Omd_representation.t -> string +val mld_of_md : min_head_lvl:int -> Omd.doc -> string diff --git a/src/dune b/src/dune index f13a2ab..86eb535 100644 --- a/src/dune +++ b/src/dune @@ -1,4 +1,4 @@ (executable (name md2mld) (public_name md2mld) - (libraries omd)) + (libraries omd str)) diff --git a/src/md2mld.ml b/src/md2mld.ml index 9141a19..2932b81 100644 --- a/src/md2mld.ml +++ b/src/md2mld.ml @@ -52,9 +52,9 @@ let _ = Printf.eprintf "error: file %s not found\n%!" filename; exit 2 | [ filename ] -> - let min_header = !min_header in + let min_head_lvl = !min_header in readall filename |> Omd.of_string - |> Backend.mld_of_md ~min_header + |> Backend.mld_of_md ~min_head_lvl |> String.trim |> print_endline diff --git a/tests/dune b/tests/dune index 5fa63f5..3f67a19 100644 --- a/tests/dune +++ b/tests/dune @@ -14,6 +14,14 @@ %{targets} (run %{exe:../src/md2mld.exe} -min-header 3 %{deps})))) +(rule + (targets test.output) + (deps test.md) + (action + (with-stdout-to + %{targets} + (run %{exe:../src/md2mld.exe} %{deps})))) + (rule (alias runtest) (action @@ -23,3 +31,8 @@ (alias runtest) (action (diff readme-min-header-3.expected readme-min-header-3.output))) + +(rule + (alias runtest) + (action + (diff test.expected test.output))) diff --git a/tests/readme-min-header-3.expected b/tests/readme-min-header-3.expected index 1d93887..8a7fb8c 100644 --- a/tests/readme-min-header-3.expected +++ b/tests/readme-min-header-3.expected @@ -1,22 +1,22 @@ -{3 Convert md files into odoc mld files} +{3 Convert md files into odoc mld files} [md2mld] converts a Markdown-format file into the [mld] format used by {{: https://github.com/ocaml/odoc} odoc} to render HTML documentation or OCaml libraries. You can use this script to automatically embed a [README.md] file into API documentation for an OCaml library. You can use it manually as follows -{[ +{[ $ md2mld filename.md > outfile.mld ]} In [dune] you can use it to generate an mld file with -{[ +{[ (rule (with-stdout-to outfile.mld (run md2mld filename.md))) ]} You can see the documentation generated from the latest tagged version of this README at {{: http://mseri.github.io/md2mld/md2mld/index.html} mseri.github.io/md2mld/md2mld/index.html}. -{3 Known issues} +{3 Known issues} Until the new odoc {{: https://github.com/ocaml/odoc/issues/141} fixing #141} is released, the minimal header allowed in the [md] file will be the level 3 one [###]. You can work around this by using the [-min-header 3] flag during the invocation of [md2mld]. diff --git a/tests/readme.expected b/tests/readme.expected index f1cde13..1dc0a0c 100644 --- a/tests/readme.expected +++ b/tests/readme.expected @@ -1,22 +1,22 @@ -{0 Convert md files into odoc mld files} +{0 Convert md files into odoc mld files} [md2mld] converts a Markdown-format file into the [mld] format used by {{: https://github.com/ocaml/odoc} odoc} to render HTML documentation or OCaml libraries. You can use this script to automatically embed a [README.md] file into API documentation for an OCaml library. You can use it manually as follows -{[ +{[ $ md2mld filename.md > outfile.mld ]} In [dune] you can use it to generate an mld file with -{[ +{[ (rule (with-stdout-to outfile.mld (run md2mld filename.md))) ]} You can see the documentation generated from the latest tagged version of this README at {{: http://mseri.github.io/md2mld/md2mld/index.html} mseri.github.io/md2mld/md2mld/index.html}. -{0 Known issues} +{0 Known issues} Until the new odoc {{: https://github.com/ocaml/odoc/issues/141} fixing #141} is released, the minimal header allowed in the [md] file will be the level 3 one [###]. You can work around this by using the [-min-header 3] flag during the invocation of [md2mld]. diff --git a/tests/test.expected b/tests/test.expected new file mode 100644 index 0000000..e7e9c05 --- /dev/null +++ b/tests/test.expected @@ -0,0 +1,43 @@ +{0 this is a markdown test document} + +This is {e important} or {b very important}, let's try the [code] fences. + +{[ +let x = "bla" +]} + +Hello this is a ordered list + ++ One ++ Two ++ Three + +And this is a unordered list + +- One +- Two +- Three + +{1 Another header} + +Blah blah blah + +{v +This is some quote + +{v +This is a nested quote +v} +v} + +And this is a link {{: https://ocaml.org} Hello}. + +{[ +another code block? +]} + +{{: https://ocaml.org} https://ocaml.org} <-- A link + +{!module:Module} <-- An explicit cross reference + +{!Module} <-- An inferred cross reference diff --git a/tests/test.md b/tests/test.md new file mode 100644 index 0000000..7bc9a50 --- /dev/null +++ b/tests/test.md @@ -0,0 +1,36 @@ +# this is a markdown test document + +This is _important_ or **very important**, let's try the `code` fences. + +``` +let x = "bla" +``` + +Hello this is a ordered list + +1. One +2. Two +3. Three + +And this is a unordered list + +- One +- Two +- Three + +## Another header + +Blah blah blah + +> This is some quote +>> This is a nested quote + +And this is a link [Hello](https://ocaml.org). + + another code block? + + <-- A link + + <-- An explicit cross reference + + <-- An inferred cross reference \ No newline at end of file