Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Omd_tyxml #211

Closed
wants to merge 36 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
ec4a88f
Ignore _ocaml directory
shonfeder Jun 28, 2020
111577b
Add omd_tyxml package
shonfeder Jun 28, 2020
65ed352
Add tests for ocaml_tyxml
shonfeder Jun 28, 2020
ebe14dc
Expose the Html submodule
shonfeder Jun 28, 2020
adf4c2e
Add WIP tests
shonfeder Jun 28, 2020
303864b
Add initial explorations of omd_tyxml module
shonfeder Jun 28, 2020
3d077a3
Remove file picked up during merge conflict resolution
shonfeder Jun 28, 2020
e2dc9dc
Fix more merge conflict mistakes
shonfeder Jun 28, 2020
22e4a46
Stub in missing values
shonfeder Jun 28, 2020
33af31d
Use diff testing based on markdown specs
shonfeder Jul 1, 2020
408313e
Correct type alias
shonfeder Jul 1, 2020
a28ebcf
Switch back to Omd.doc -> Tyxml.doc approach
shonfeder Jul 1, 2020
379d9a3
Add uri dependency
shonfeder Jul 3, 2020
35d0cdf
Remove unneeded uri dep
shonfeder Jul 3, 2020
28011db
Expose the escape_uri function
shonfeder Jul 3, 2020
ce39cbc
Denormalize generated html in tests
shonfeder Jul 3, 2020
bb77693
Rough in inline elements
shonfeder Jul 3, 2020
3d654e5
Add more denormalizations
shonfeder Jul 5, 2020
6fbd2e9
Cover most inline elements
shonfeder Jul 5, 2020
0304ce4
Add lambdasoup as test dependency
shonfeder Jul 6, 2020
99bef0c
Restore block dropped during rebase
shonfeder Jul 6, 2020
1b2c3c9
Add Tyxml backend for all element types
shonfeder Jul 6, 2020
423fcb4
Some cleanup
shonfeder Jul 6, 2020
9476531
Remove dead code
shonfeder Jul 6, 2020
ce8ab3f
Fix type misnomers
shonfeder Jan 19, 2021
35d5d38
Fix failing tests
shonfeder Jan 22, 2021
6a7c590
Remove unsafe coerce and improve conversion
shonfeder Feb 8, 2021
94b2f94
Use cons_opt function instead of clumsy append
shonfeder Feb 8, 2021
8cf71fb
Convert > 6h into paragraphs
shonfeder Feb 8, 2021
204741d
Don't coerce tight list items
shonfeder Feb 15, 2021
eb0be13
Don't coerce in definition list translation
shonfeder Feb 15, 2021
3735ab8
Clean up comments and reorganize
shonfeder Feb 15, 2021
4266b36
Fix type in omd_tyxml description
shonfeder Feb 20, 2021
e00103e
Cleanup and document
shonfeder Feb 20, 2021
a36925c
Update TODOs
shonfeder Feb 20, 2021
f98007b
Comment tweaks
shonfeder Feb 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
_opam/
_build/
.merlin
*.install
1 change: 1 addition & 0 deletions bin/dune
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
(executable
(name main)
(public_name omd)
(package omd)
(libraries omd))
10 changes: 10 additions & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,13 @@ Additionally, OMD implements a few Github markdown features, an
extension mechanism, and some other features. Note that the opam
package installs both the OMD library and the command line tool `omd`.")
(tags (org:ocamllabs org:mirage)))

(package
(name omd-tyxml)
(synopsis "A library to convert OMD's markdown representation to Tyxml")
(description
"This optional library enables users of OMD to convert values of type Omd.doc,
representing parsed markdown, into values of type Tyxml.Html.t, which provides
statically correct represenations of HTML.")
(tags (org:ocamllabs org:mirage))
(depends omd tyxml (lambdasoup :with-test)))
37 changes: 37 additions & 0 deletions omd-tyxml.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "2.0.0"
synopsis: "A library to convert OMD's markdown representation to Tyxml"
description: """
This optional library enables users of OMD to convert values of type Omd.doc,
representing parsed markdown, into values of type Tyxml.Html.t, which provides
statically correct represenations of HTML."""
authors: [
"Philippe Wang <[email protected]>"
"Nicolás Ojeda Bär <[email protected]>"
]
license: "ISC"
tags: ["org:ocamllabs" "org:mirage"]
homepage: "https://github.com/ocaml/omd"
bug-reports: "https://github.com/ocaml/omd/issues"
depends: [
"dune" {>= "2.5"}
"omd"
"tyxml"
"lambdasoup" {with-test}
]
build: [
["dune" "subst"] {pinned}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/ocaml/omd.git"
3 changes: 3 additions & 0 deletions omd_tyxml/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(library
(name omd_tyxml)
(libraries omd tyxml))
177 changes: 177 additions & 0 deletions omd_tyxml/omd_tyxml.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
open Tyxml

(** TODO move into Omd if we don't replace the html module with this one *)

(** [cons_opt opt_x xs] is (x :: xs) if [opt_x] is [Some x] or else just [xs].*)
let cons_opt : 'a option -> 'a list -> 'a list =
fun x_opt xs ->
match x_opt with
| None -> xs
| Some x -> x :: xs

(** TODO move into Omd if we don't replace the html module with this one *)

(** [inline_to_plain_text il] is a string with just the textual content
of the the inline term [il]. All semantic and formatting nodes are ignored.

This is intended for use internally, for converting inline elements which
do not support any markup, such as image labels. *)
let inline_to_plain_text : Omd.inline -> string =
fun il ->
let buf = Buffer.create 1024 in
let rec go {Omd.il_desc; _} = match il_desc with
| Concat xs -> List.iter go xs
| Emph t | Strong t -> go t
| Link l | Image l -> go l.label
| Hard_break | Soft_break -> ()
| Code s | Html s | Text s -> Buffer.add_string buf s
in
go il;
Buffer.contents buf

let of_omd_attributes attrs =
List.map (fun (a, v) -> Html.Unsafe.string_attrib a v) attrs


(* INLINE CONVERSION *)

(* NOTE: The unfortunate duplication of inline handlers seems to be necessary
to get the Tyxml types constructed correctly. However, if you know how to
simplify, please help! *)
(* TODO Support verified html (instead of using Html.Unsafe.data) ?*)
let rec of_inline : Omd.inline -> Html_types.phrasing Html.elt list =
fun {il_attributes; il_desc} ->
let attrs = of_omd_attributes il_attributes in
match il_desc with
| Html raw -> Html.Unsafe.[data raw]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is "correct" as long as the HTML is textual. As soon as you start using the non-textual Tyxml backends (for instance, Tyxml_js`), this is broken and you have to do more work.

Long term, I think it would be beneficial to provide a functor over the HTML module to adapt to different Tyxml instantiations (and provide a pre-applied instance for text!). The "right" solution is then to decode the HTML with lambdasoup and build actual tyxml trees. Then you can build DOM trees from markdown directly, without going through text.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure whether we want this generalization as part of the current PR, or as a followup, but I agree we should target it in the long run.

I think we first need to settle settle whether or not we can replace the bespoke html generator with Tyxml.

| Code c -> Html.[code ~a:attrs [txt c]]
| Emph e -> Html.[em ~a:attrs (of_inline e)]
| Hard_break -> Html.[br ~a:attrs ()]
| Soft_break -> Html.[txt "\n"]
| Strong s -> Html.[strong ~a:attrs (of_inline s)]
| Text t -> Html.[txt t]
| Concat ls -> List.concat_map of_inline ls
| Link l -> [of_link attrs l]
| Image img -> [(of_img attrs img :> Html_types.phrasing Html.elt)]

and of_def_term : Omd.inline -> Html_types.dt_content Html.elt list =
fun {il_desc; il_attributes} ->
let attrs = of_omd_attributes il_attributes in
match il_desc with
| Html raw -> Html.Unsafe.[data raw]
| Code c -> Html.[code ~a:attrs [txt c]]
| Emph e -> Html.[em ~a:attrs (of_inline e)]
| Hard_break -> Html.[br ~a:attrs ()]
| Soft_break -> Html.[txt "\n"]
| Strong s -> Html.[strong ~a:attrs (of_inline s)]
| Text t -> Html.[txt t]
| Concat ls -> List.concat_map of_def_term ls
| Link l -> [(of_link attrs l :> Html_types.dt_content Html.elt)]
| Image img -> [(of_img attrs img :> Html_types.dt_content Html.elt)]

and of_link_label : Omd.inline -> Html_types.phrasing_without_interactive Html.elt list =
fun {il_desc; il_attributes} ->
let attrs = of_omd_attributes il_attributes in
match il_desc with
| Code c -> Html.[code ~a:attrs [txt c]]
| Emph e -> Html.[em ~a:attrs (of_link_label e)]
| Strong s -> Html.[strong ~a:attrs (of_link_label s)]
| Text t -> Html.[txt t]
| Concat ls -> List.concat_map of_link_label ls
| Image img -> [(of_img attrs img :> Html_types.phrasing_without_interactive Html.elt)]
(* We ignore any elements that shouldn't be included in link labels. *)
| _ -> []

and of_link attrs (l : Omd.link) =
let escaped_url = Omd.Internal.escape_uri l.destination in
let attrs =
let url = Html.a_href escaped_url in
let title = Option.map Html.a_title l.title in
(* The url goes before the title to match the order in the spec.txt *)
url :: cons_opt (title) attrs
in
Html.(a ~a:attrs (of_link_label l.label))

and of_img attrs (img : Omd.link) =
let escaped_url = Omd.Internal.escape_uri img.destination in
let attrs = cons_opt (Option.map Html.a_title img.title) attrs in
let alt = inline_to_plain_text img.label in
Html.(img ~src:escaped_url ~alt ~a:attrs ())


(* BLOCK CONVERSION *)

let of_heading n attrs content =
let ctr =
let open Html in
match n with
| 1 -> h1
| 2 -> h2
| 3 -> h3
| 4 -> h4
| 5 -> h5
| 6 -> h6
| _ -> p (* See ATX Headings in the tests/spec.txt *)
in
ctr ~a:attrs (of_inline content)

let of_code_block src attrs content =
let src_attr = match src with
| "" -> []
| _ -> [Html.a_class ["language-" ^ src]]
in
Html.(pre ~a:attrs [code ~a:src_attr [txt content]])

let rec of_list (typ : Omd.list_type) (spacing : Omd.list_spacing) items =
let of_list_block (bl : Omd.block) : Html_types.li_content Html.elt list =
match bl.bl_desc, spacing with
| Paragraph il, Tight -> (of_def_term il :> Html_types.li_content_fun Html.elt list)
| _ -> [of_block bl]
in
let to_list_item i = List.concat_map of_list_block i |> Html.li in
let to_list_element =
match typ with
| Ordered (start, _) -> Html.ol ~a:(if start <> 1 then [Html.a_start start] else [])
| Bullet _ -> Html.ul ~a:[]
in
items
|> List.map to_list_item
|> to_list_element

and of_definition_list defs =
let entry ({term; defs} : Omd.def_elt) =
(* "The term — word or phrase — defined in a definition." *)
let definiendum = Html.dt (of_def_term term) in
(* "The words or phrases that define the definiendum in a definition." *)
let definientia = List.map (fun d -> Html.dd (of_def_term d)) defs in
definiendum :: definientia
in
Html.dl (List.concat_map entry defs)

and of_block : Omd.block -> Html_types.flow5 Html.elt =
fun block ->
let attrs = of_omd_attributes block.bl_attributes in
match block.bl_desc with
| Paragraph content -> Html.p (of_inline content)
| Blockquote content -> Html.blockquote (List.map of_block content)
| Thematic_break -> Html.hr ()
| Html_block html -> Html.Unsafe.data html
| List (typ, spacing, items) -> of_list typ spacing items
| Heading (n, content) -> of_heading n attrs content
| Code_block (src, code) -> of_code_block src attrs code
| Definition_list content -> of_definition_list content


(* API *)

let of_fragment : Omd.doc -> Html_types.flow5 Html.elt list =
fun omd -> List.map of_block omd

let of_doc ?(title="") : Omd.doc -> Tyxml.Html.doc =
fun omd ->
let title' = title in
let body' = of_fragment omd in
let open Html in
html
(head (title (txt title')) [])
(body body')
13 changes: 13 additions & 0 deletions omd_tyxml/omd_tyxml.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
(** Convert values of type {!type:Omd.doc} to values of type {!type:Tyxml.Html.doc} *)

(** [of_doc doc] is a {{:https://ocsigen.org/tyxml} Tyxml} document
representation markdown data [doc] as statically validated
{{:https://ocsigen.org/tyxml/latest/api/Html_sigs.T#TYPEdoc} HTML document}. *)
val of_doc : ?title:string -> Omd.doc -> Tyxml.Html.doc

(** [of_fragment omd] is a {{:https://ocsigen.org/tyxml} Tyxml} representation
of the
{{:https://www.w3.org/TR/2011/WD-html5-20110525/content-models.html#flow-content}
flow} elements corresponding to the a given [omd]. This is useful when [omd]
is a fragment rather than a standalone document. *)
val of_fragment : Omd.doc -> Html_types.flow5 Tyxml.Html.elt list
2 changes: 2 additions & 0 deletions src/html.mli
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ type t =
| Null
| Concat of t * t

val escape_uri : string -> string

val of_doc : block list -> t

val to_string : t -> string
Expand Down
4 changes: 4 additions & 0 deletions src/omd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@ let to_html doc =

let to_sexp ast =
Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast)

module Internal = struct
let escape_uri = Html.escape_uri
end
6 changes: 6 additions & 0 deletions src/omd.mli
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,9 @@ val of_string: string -> doc
val to_html: doc -> string

val to_sexp: doc -> string

(* TODO rm if we can integrate Tyxml into main Omd package *)
(** Values for internal usage *)
module Internal : sig
val escape_uri : string -> string
end
4 changes: 4 additions & 0 deletions tests/common.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
let normalize_html s =
Copy link
Collaborator Author

@shonfeder shonfeder Feb 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We won't need this as a separate module if we decide to go with one package.

String.trim s
|> Soup.parse
|> Soup.pretty_print
15 changes: 12 additions & 3 deletions tests/dune
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
(executable
(name extract_tests)
(libraries str)
(libraries str common)
(modules extract_tests))

; Code shared between various parts of the testing apartus
Copy link
Collaborator Author

@shonfeder shonfeder Feb 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto re: not needing this if we go with one packages.

(library
(name common)
(libraries lambdasoup)
(modules common))

; Generate and run tests for the core omd package
(rule
(with-stdout-to
dune.inc.new
Expand All @@ -15,9 +22,11 @@

(executable
(name omd)
(libraries str omd)
(libraries str omd omd_tyxml tyxml common)
(modules omd))

; Generate the rules for diff-based tests
(rule
(alias gen)
(action (diff dune.inc dune.inc.new)))
(action
(diff dune.inc dune.inc.new)))
Loading