From 8a054b335f9a57d1ac9a9b9aa34bd4be90c52fcf Mon Sep 17 00:00:00 2001
From: Mark Wolfman <canismarko@gmail.com>
Date: Tue, 6 Jul 2021 23:48:36 -0500
Subject: [PATCH] Added table of contents to epub.

---
 dungeonsheets/epub.py                      | 109 ++++++++++++++++++++-
 dungeonsheets/forms/dungeonsheets_epub.css |   3 +
 dungeonsheets/forms/monsters_template.html |   4 +-
 dungeonsheets/make_sheets.py               |   2 +-
 tests/test_epub.py                         |  40 ++++++++
 tests/test_make_sheets.py                  |   8 +-
 6 files changed, 156 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_epub.py

diff --git a/dungeonsheets/epub.py b/dungeonsheets/epub.py
index a1c6217..7191ee4 100644
--- a/dungeonsheets/epub.py
+++ b/dungeonsheets/epub.py
@@ -1,4 +1,6 @@
 from typing import Mapping
+from html.parser import HTMLParser
+import re
 
 from ebooklib import epub, ITEM_STYLE
 from docutils import core
@@ -40,11 +42,13 @@ def create_epub(
     style = css_template.render(use_dnd_decorations=use_dnd_decorations)
     css = epub.EpubItem(uid="style_default", file_name="style/gm_sheet.css",
                         media_type="text/css", content=style)
-    book.add_item(css)    
+    book.add_item(css)
+    toc = ["nav"]
     # Create the separate chapters
     html_chapters = []
     for chap_title, content in chapters.items():
-        chap_fname = "{}.html".format(chap_title.replace(" ", "_").lower())
+        chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
+        chap_fname = "{}.html".format(chap_fname)
         chapter = epub.EpubHtml(title=chap_title,
                                 file_name=chap_fname, lang="en",
                                 media_type="application/xhtml+xml")
@@ -52,8 +56,10 @@ def create_epub(
         chapter.add_item(css)
         book.add_item(chapter)
         html_chapters.append(chapter)
+        # Add entries for the table of contents
+        toc.append(toc_from_headings(html=content, filename=chap_fname, chapter_title=chap_title))
     # Add the table of contents
-    book.toc = html_chapters
+    book.toc = toc
     book.spine = ("nav", *html_chapters)
     # add default NCX and Nav file
     book.add_item(epub.EpubNcx())
@@ -63,6 +69,103 @@ def create_epub(
     epub.write_epub(epub_fname, book)
 
 
+class HeadingParser(HTMLParser):
+    tag_re = re.compile("h(\d+)")
+    _curr_level = None
+    _curr_id = None
+    _curr_title = None
+    
+    def __init__(self, *args, **kwargs):
+        self.headings = []
+        super().__init__(*args, **kwargs)
+
+    def heading_level(self, tag):
+        match = self.tag_re.match(tag)
+        if match:
+            return int(match.group(1))
+        else:
+            return None
+    
+    def handle_starttag(self, tag, attrs):
+        this_level = self.heading_level(tag)
+        if this_level is not None:
+            # Found a heading, so process the properties
+            self._curr_level = this_level
+            attrs = {k: v for k, v in attrs}
+            self._curr_id = attrs.get('id')
+            
+    def handle_endtag(self, tag):
+        this_level = self.heading_level(tag)
+        if this_level is not None and this_level == self._curr_level:
+            heading = {
+                "level": this_level,
+                "id": self._curr_id,
+                "title": self._curr_title
+            }
+            self.headings.append(heading)
+
+    def handle_data(self, data):
+        # Save the title
+        if self._curr_level is not None:
+            self._curr_title = data
+
+
+def toc_from_headings(html: str, filename: str = "", chapter_title: str = "Sheet") -> list:
+    """Accept a chapter of HTML, and extract a table of contents segment.
+
+    Parameters
+    ----------
+    html
+      The HTML block to be parsed.
+    filename
+      The name of this file to be used for hrefs. E.g.
+      "index.html#heading_1".
+    
+    Returns
+    -------
+    toc
+      A sequence of table-of-contents links.
+    
+    """
+    # [(<ebooklib.epub.Section at 0x7fdf903595d0>,
+    #   [(<ebooklib.epub.Section at 0x7fdf90359310>,
+    #     [<ebooklib.epub.Link at 0x7fdf90359bd0>,
+    #      <ebooklib.epub.Link at 0x7fdf90359c50>])])]
+    # Parse the HTML
+    parser = HeadingParser()
+    parser.feed(html)
+    headings = parser.headings
+    # Parse into a table of contents
+    if len(headings) == 0:
+        # No headings found, so just the chapter link
+        toc = epub.Link(href=filename, title=chapter_title, uid=filename)
+    else:
+        # Add a section for the chapter as a whole
+        toc = (epub.Section(href=filename, title=chapter_title), [])
+        sections_stack = [toc]
+        # Parse all the headings
+        for idx, heading in enumerate(headings):
+            # Determine where we are in the tree
+            href = f"{filename}#{heading['id']}"
+            parent_section = sections_stack[-1]
+            is_last = idx == (len(headings) - 1)
+            is_leaf = is_last or heading['level'] >= headings[idx+1]['level']
+            # Add a leaf or branch depending on the heading structure
+            if is_leaf:
+                parent_section[1].append(epub.Link(href=href, title=heading['title'], uid=href))
+            else:
+                new_section = (epub.Section(href=href, title=heading['title']),
+                               [])
+                parent_section[1].append(new_section)
+                sections_stack.append(new_section)
+            # Walk back up the stack
+            if not is_last:
+                for idx in range(max(0, heading['level'] - headings[idx + 1]['level'])):
+                    sections_stack.pop()
+
+    return toc
+
+
 def html_parts(
     input_string,
     source_path=None,
diff --git a/dungeonsheets/forms/dungeonsheets_epub.css b/dungeonsheets/forms/dungeonsheets_epub.css
index 08af226..2827fcc 100644
--- a/dungeonsheets/forms/dungeonsheets_epub.css
+++ b/dungeonsheets/forms/dungeonsheets_epub.css
@@ -37,3 +37,6 @@ div.system-message {
   border-width: 2px;
   color: red;
 }
+.literal {
+    font-family: monospace;
+}
diff --git a/dungeonsheets/forms/monsters_template.html b/dungeonsheets/forms/monsters_template.html
index fa59ca6..929afea 100644
--- a/dungeonsheets/forms/monsters_template.html
+++ b/dungeonsheets/forms/monsters_template.html
@@ -1,10 +1,10 @@
 <h1 id="gm-monsters">Monsters</h1>
 
 [% for monster in monsters|sort(attribute='name') %]
-<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h1>
+<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h2>
 
 [% if monster.description %]  
-<h3>[[ monster.description ]]</h2>
+<h3>[[ monster.description ]]</h3>
 [% endif %]
 
 <!-- Basic properties -->
diff --git a/dungeonsheets/make_sheets.py b/dungeonsheets/make_sheets.py
index 49818a0..b3f8a4a 100755
--- a/dungeonsheets/make_sheets.py
+++ b/dungeonsheets/make_sheets.py
@@ -279,7 +279,7 @@ def make_gm_sheet(
     gm_props.pop("sheet_type")
     if len(gm_props.keys()) > 0:
         msg = f"Unhandled attributes in '{str(gm_file)}': {','.join(gm_props.keys())}"
-        log.warn(msg)
+        log.warning(msg)
         warnings.warn(msg)
     # Produce the combined output depending on the format requested
     if output_format == "pdf":
diff --git a/tests/test_epub.py b/tests/test_epub.py
new file mode 100644
index 0000000..2fa99c3
--- /dev/null
+++ b/tests/test_epub.py
@@ -0,0 +1,40 @@
+from unittest import TestCase
+
+from ebooklib import epub
+
+
+from dungeonsheets.epub import toc_from_headings
+
+
+class TOCTestCase(TestCase):
+    def test_toc_from_no_headings(self):
+        html = '<p>Hello, world</p>'
+        toc = toc_from_headings(html)
+        self.assertIsInstance(toc, epub.Link)
+    
+    def test_toc_from_single_heading(self):
+        html = '<h1 id="hello_world">Hello, world</h1>'
+        toc = toc_from_headings(html)
+        self.assertIsInstance(toc, tuple)
+        self.assertIsInstance(toc[0], epub.Section)
+        self.assertIsInstance(toc[1], list)
+
+    def test_toc_from_heading_tree(self):
+        html = ('<h1 id="other_world">Other, world</h1>'
+                '<h2 id="other_country">Other, country</h2>'
+                '<h1 id="hello_world">Hello, world</h1>'
+                '<h2 id="hello_country">Hello, country</h2>'
+                '<h2 id="goodbye_country">Goodbye, country</h2>'
+                '<h3 id="hello_city">Hello, city</h3>'
+                '<h1 id="whatever">Whatever</h1>'
+                )
+        toc = toc_from_headings(html)
+        heading_toc = toc[1]
+        self.assertIsInstance(heading_toc, list)
+        self.assertIsInstance(heading_toc[0][0], epub.Section)
+        self.assertEqual(heading_toc[0][0].title, "Other, world")
+        self.assertIsInstance(heading_toc[2], epub.Link)
+        self.assertEqual(heading_toc[2].title, "Whatever")
+        self.assertIsInstance(heading_toc[2], epub.Link)
+        self.assertIsInstance(heading_toc[1][1][0], epub.Link)
+        self.assertEqual(heading_toc[1][1][0].title, "Hello, country")
diff --git a/tests/test_make_sheets.py b/tests/test_make_sheets.py
index 682de02..a3d8227 100644
--- a/tests/test_make_sheets.py
+++ b/tests/test_make_sheets.py
@@ -55,10 +55,10 @@ class MakeSheetsTestCase(unittest.TestCase):
 class EpubOutputTestCase(unittest.TestCase):
     gm_epub = Path(f"{GMFILE.stem}.epub").resolve()
 
-    # def tearDown(self):
-    #     for f in [self.gm_epub]:
-    #         if f.exists():
-    #             f.unlink()
+    def tearDown(self):
+        for f in [self.gm_epub]:
+            if f.exists():
+                f.unlink()
 
     def test_file_created(self):
         # Check that a file is created once the function is run