TEMP: add rst-conversion scripts

This is just a temporary commit, adding the scripts that performs the automated conversion of the docs. The next commit contains the results of the conversion, and the commit following that removes these scripts again. To redo the conversion in the next commit, rebase interactively to edit this commit and delete the next one, and run './update-docs.sh' from the root directory. Then continue the rebasing, and resolve any conflicts that might have occurred in the manual fixes on top. Finally, build the documentation to ensure no further fixups are needed. Reviewed-by: Eric Engestrom <eric@engestrom.ch> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4630>
2019-06-10 19:08:35 +02:00 · 2019-06-10 19:08:35 +02:00 · 1df5dbf516
parent 4de678cd30
commit 1df5dbf516
2 changed files with 92 additions and 0 deletions
--- a/convert-sphinx.py
+++ b/convert-sphinx.py
@ -0,0 +1,79 @@
+import os, glob
+from bs4 import BeautifulSoup
+from subprocess import run, PIPE
+from urllib.parse import urlparse
+import dashtable
+
+def html_to_rst(input):
+    return run(['pandoc', '-f', 'html', '-t', 'rst'],
+               input=input, stdout=PIPE, universal_newlines=True).stdout
+
+def convert_toc(filename):
+    with open(filename, encoding='utf8') as input:
+        soup = BeautifulSoup(input, 'html5lib')
+        body = soup.find('body')
+        with open('./docs/contents.rst', 'w', encoding='utf-8') as output:
+            for elm in body.contents:
+                if elm.name == 'h2':
+                    output.write(""".. toctree::
+   :maxdepth: 1
+   :caption: {0}
+   :hidden:\n""".format(elm.get_text()))
+                elif elm.name == 'ul':
+                    output.write('\n')
+                    for li in elm.contents:
+                        if li.name == 'li':
+                            a = li.find('a')
+                            url = a['href']
+                            if url == 'index.html':
+                                output.write('   self\n')
+                            elif bool(urlparse(url).netloc):
+                                output.write('   {0} <{1}>\n'.format(a.get_text(), url))
+                            else:
+                                output.write('   {0}\n'.format(url[:-5]))
+                    output.write('\n')
+                elif elm.name == 'dl':
+                    a = elm.find('a')
+                    output.write('\n   {0} <{1}>\n'.format(a.get_text(), url))
+                elif hasattr(elm, 'contents'):
+                    print('**** UNKNOWN: ' + str(elm))
+                    exit(1)
+    print("SUCCESS: " + filename)
+
+def convert_article(filename):
+    with open(filename, encoding='utf8') as input:
+        soup = BeautifulSoup(input, 'html5lib')
+
+        table = None
+        if filename == './docs/release-calendar.html':
+            table = dashtable.html2rst(str(soup.table.extract()))
+
+        content = soup.find('div', 'content')
+        content = ''.join(map(str, content.contents))
+        content = html_to_rst(str(content))
+
+        if table:
+            content = '\n'.join([content, table, ''])
+
+        with open(os.path.splitext(filename)[0]+'.rst', 'w', encoding='utf-8') as output:
+            output.write(str(content))
+            if filename == './docs/relnotes.html':
+                output.write("""\n.. toctree::
+   :maxdepth: 1
+   :hidden:\n""")
+                output.write('\n')
+                for li in soup.findAll('li'):
+                    a = li.find('a')
+                    url = a['href']
+                    split = os.path.splitext(url)
+                    if split[1] == '.html':
+                        output.write('   {0}\n'.format(split[0]))
+                output.write('   Older Versions <versions>\n')
+
+    print("SUCCESS: " + filename)
+
+for filename in glob.iglob('./docs/**/*.html', recursive=True):
+    if filename == './docs/contents.html':
+        convert_toc(filename)
+    else:
+        convert_article(filename)
--- a/update-docs.sh
+++ b/update-docs.sh
@ -0,0 +1,13 @@
+#!/bin/sh
+
+python3 ./convert-sphinx.py &&
+git add "docs/**.rst" && git rm "docs/**.html" &&
+git commit -am "docs: convert articles to reructuredtext
+
+This uses the previously added scripts to convert the documentation to
+reStructuredText, which is both easier to read offline, and can be used
+to generate modern HTML for online documentation.
+
+No modification to the generated results have been done.
+
+Acked-by: Eric Engestrom <eric@engestrom.ch>"