From cbb23240143571fe65a11211240c94726427783e Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Thu, 31 Oct 2024 09:28:39 +0100 Subject: [PATCH 1/7] docs: script to convert HTML manual pages to markdown Script to convert recursively all .html files to .md (GitHub flavoured Markdown). (see related #3849) --- utils/grass_html2md.sh | 42 ++++++++++++++++++++++++++++++++++++++ utils/pandoc_codeblock.lua | 8 ++++++++ 2 files changed, 50 insertions(+) create mode 100755 utils/grass_html2md.sh create mode 100644 utils/pandoc_codeblock.lua diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh new file mode 100755 index 00000000000..f09695f4078 --- /dev/null +++ b/utils/grass_html2md.sh @@ -0,0 +1,42 @@ +#!/bin/sh +set -eu + +############################################################################### +# Convert recursively all .html files to .md (GitHub flavoured Markdown) +# +# Dependencies: +# pandoc +# wget +# +# Author(s): +# Martin Landa, Markus Neteler +# +# Usage: +# If you have "pandoc" in PATH, execute for HTML file conversion in +# current directory and subdirectories: +# ./utils/grass_html2md.sh +# +# COPYRIGHT: (C) 2024 by the GRASS Development Team +# +# This program is free software under the GNU General Public +# License (>=v2). Read the file COPYING that comes with GRASS +# for details. +# +############################################################################### + +# define $TMP if not present +if test -z "${TMP}" ; then + TMP="/tmp" +fi + +# TODO: path to LUA file setting to be improved (./utils/pandoc_codeblock.lua) +#wget https://raw.githubusercontent.com/OSGeo/grass/refs/heads/main/utils/pandoc_codeblock.lua -O "${TMP}/pandoc_codeblock.lua" +TMP="utils" + +# run recursively: HTML to MD +for f in `find . -name *.html`; do + echo "${f}" + cat "${f}" | sed 's#
#
#g' | sed 's#
##g' | pandoc \ + --from=html --to=markdown -t gfm --lua-filter "${TMP}/pandoc_codeblock.lua" | \ + sed 's+ $++g' | sed 's+\.html)+\.md)+g' > "${f%%.html}.md" +done diff --git a/utils/pandoc_codeblock.lua b/utils/pandoc_codeblock.lua new file mode 100644 index 00000000000..ebdce56c2d3 --- /dev/null +++ b/utils/pandoc_codeblock.lua @@ -0,0 +1,8 @@ +-- Pandoc Lua filter to handle code blocks +-- Test cases +-- raster/r.sun/r.sun.html + +-- Function to convert code blocks to markdown +function CodeBlock (cb) + return pandoc.RawBlock('markdown', '```bash\n' .. cb.text .. '\n```\n') +end From 023f9435dd696b4315c55fdf71a3e7b122211545 Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Fri, 15 Nov 2024 13:18:02 +0100 Subject: [PATCH 2/7] HTML: Process the tmp file to selectively replace .html with .md only in relative URLs; simplify path to utils --- utils/grass_html2md.sh | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh index f09695f4078..3d6609787e4 100755 --- a/utils/grass_html2md.sh +++ b/utils/grass_html2md.sh @@ -24,19 +24,26 @@ set -eu # ############################################################################### -# define $TMP if not present -if test -z "${TMP}" ; then - TMP="/tmp" -fi - -# TODO: path to LUA file setting to be improved (./utils/pandoc_codeblock.lua) -#wget https://raw.githubusercontent.com/OSGeo/grass/refs/heads/main/utils/pandoc_codeblock.lua -O "${TMP}/pandoc_codeblock.lua" -TMP="utils" +# path to LUA file (./utils/pandoc_codeblock.lua) +UTILSPATH="utils" # run recursively: HTML to MD -for f in `find . -name *.html`; do +for f in $(find . -name *.html); do echo "${f}" - cat "${f}" | sed 's#
#
#g' | sed 's#
##g' | pandoc \ - --from=html --to=markdown -t gfm --lua-filter "${TMP}/pandoc_codeblock.lua" | \ - sed 's+ $++g' | sed 's+\.html)+\.md)+g' > "${f%%.html}.md" + + # HTML: Process the tmp file to selectively replace .html with .md only in relative URLs + sed -E ' + # Step 1: Preserve https or http URLs with .html + s|(|\1_KEEPHTML">|g; + # Step 2: Replace .html with .md for other links + s|(|\1\2.md">|g; + # Step 3: Restore preserved https or http URLs + s|_KEEPHTML">|">|g; +' "${f%%.html}.html" > "${f%%.html}_tmp.html" + + cat "${f%%.html}_tmp.html" | sed 's#
#
#g' | sed 's#
##g' | pandoc \ + --from=html --to=markdown -t gfm --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" > "${f%%.html}.md" + + rm -f "${f%%.html}_tmp.html" + done From 8ccc15dd83992bcf9dc00e524dd355a69de22cf9 Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Sun, 24 Nov 2024 14:08:21 +0100 Subject: [PATCH 3/7] added signal handler to cleanup at user break --- utils/grass_html2md.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh index 3d6609787e4..90c725def6d 100755 --- a/utils/grass_html2md.sh +++ b/utils/grass_html2md.sh @@ -24,6 +24,22 @@ set -eu # ############################################################################### +# cleanup at user break +cleanup() +{ + rm -f "${f%%.html}_tmp.html" +} + +# what to do in case of user break: +exitprocedure() +{ + echo "User break!" + cleanup + exit 1 +} +# shell check for user break (signal list: trap -l) +trap "exitprocedure" 2 3 15 + # path to LUA file (./utils/pandoc_codeblock.lua) UTILSPATH="utils" From 80c5ce55fbbb8570e24ece75e381995c518e8634 Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Sun, 24 Nov 2024 18:04:00 +0100 Subject: [PATCH 4/7] switch to bash --- utils/grass_html2md.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh index 90c725def6d..30b04efde9d 100755 --- a/utils/grass_html2md.sh +++ b/utils/grass_html2md.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash set -eu ############################################################################### From f91a11160e99b3bcc9c5df67311854c245e95354 Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Sun, 24 Nov 2024 18:04:18 +0100 Subject: [PATCH 5/7] un-escape in text (e.g. in grass.html#examples) --- utils/grass_html2md.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh index 30b04efde9d..cc8f89e33fd 100755 --- a/utils/grass_html2md.sh +++ b/utils/grass_html2md.sh @@ -57,8 +57,12 @@ for f in $(find . -name *.html); do s|_KEEPHTML">|">|g; ' "${f%%.html}.html" > "${f%%.html}_tmp.html" - cat "${f%%.html}_tmp.html" | sed 's#
#
#g' | sed 's#
##g' | pandoc \ - --from=html --to=markdown -t gfm --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" > "${f%%.html}.md" + cat "${f%%.html}_tmp.html" | \ + sed 's#
#
#g' | \
+        sed 's#
##g' | \ + pandoc --from=html --to=markdown -t gfm \ + --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" | \ + sed 's+ \\\$+ \$+g' > "${f%%.html}.md" rm -f "${f%%.html}_tmp.html" From dcf157266e3d607202bec967e29453502f890a82 Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Fri, 6 Dec 2024 00:19:40 +0100 Subject: [PATCH 6/7] grass_html2md.sh: also convert relative URLs with #anchor but keep full URLs as before; fix %20 to dash for mkdocs --- utils/grass_html2md.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh index cc8f89e33fd..9ac2e548ae1 100755 --- a/utils/grass_html2md.sh +++ b/utils/grass_html2md.sh @@ -49,12 +49,12 @@ for f in $(find . -name *.html); do # HTML: Process the tmp file to selectively replace .html with .md only in relative URLs sed -E ' - # Step 1: Preserve https or http URLs with .html - s|(
|\1_KEEPHTML">|g; - # Step 2: Replace .html with .md for other links - s|(|\1\2.md">|g; - # Step 3: Restore preserved https or http URLs - s|_KEEPHTML">|">|g; + # Step 1: Preserve http/https links with .html (and optional anchors) + s|(|\1_KEEPHTML\2">|g; + # Step 2: Replace .html with .md for local links (with or without anchors) + s|(|\1\2.md\3">|g; + # Step 3: Restore preserved http/https links with .html + s|_KEEPHTML||g; ' "${f%%.html}.html" > "${f%%.html}_tmp.html" cat "${f%%.html}_tmp.html" | \ @@ -62,7 +62,7 @@ for f in $(find . -name *.html); do sed 's###g' | \ pandoc --from=html --to=markdown -t gfm \ --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" | \ - sed 's+ \\\$+ \$+g' > "${f%%.html}.md" + sed 's+ \\\$+ \$+g' | sed 's+%20+-+g' > "${f%%.html}.md" rm -f "${f%%.html}_tmp.html" From 8f043366aa1f1dacdd4281f7328193736af4026b Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Sun, 22 Dec 2024 12:42:09 +0100 Subject: [PATCH 7/7] change markdown fenced code blocks from bash to shell --- utils/pandoc_codeblock.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/pandoc_codeblock.lua b/utils/pandoc_codeblock.lua index ebdce56c2d3..e2a0a54910f 100644 --- a/utils/pandoc_codeblock.lua +++ b/utils/pandoc_codeblock.lua @@ -4,5 +4,5 @@ -- Function to convert code blocks to markdown function CodeBlock (cb) - return pandoc.RawBlock('markdown', '```bash\n' .. cb.text .. '\n```\n') + return pandoc.RawBlock('markdown', '```shell\n' .. cb.text .. '\n```\n') end