Added table of contents to epub.

2026-06-07 13:15:53 +02:00 · 2021-07-06 23:48:36 -05:00
parent b1e69182eb
commit 8a054b335f
6 changed files with 156 additions and 10 deletions
@@ -1,4 +1,6 @@
 from typing import Mapping
+from html.parser import HTMLParser
+import re

 from ebooklib import epub, ITEM_STYLE
 from docutils import core
@@ -40,11 +42,13 @@ def create_epub(
    style = css_template.render(use_dnd_decorations=use_dnd_decorations)
    css = epub.EpubItem(uid="style_default", file_name="style/gm_sheet.css",
                        media_type="text/css", content=style)
-    book.add_item(css)    
+    book.add_item(css)
+    toc = ["nav"]
    # Create the separate chapters
    html_chapters = []
    for chap_title, content in chapters.items():
-        chap_fname = "{}.html".format(chap_title.replace(" ", "_").lower())
+        chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
+        chap_fname = "{}.html".format(chap_fname)
        chapter = epub.EpubHtml(title=chap_title,
                                file_name=chap_fname, lang="en",
                                media_type="application/xhtml+xml")
@@ -52,8 +56,10 @@ def create_epub(
        chapter.add_item(css)
        book.add_item(chapter)
        html_chapters.append(chapter)
+        # Add entries for the table of contents
+        toc.append(toc_from_headings(html=content, filename=chap_fname, chapter_title=chap_title))
    # Add the table of contents
-    book.toc = html_chapters
+    book.toc = toc
    book.spine = ("nav", *html_chapters)
    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
@@ -63,6 +69,103 @@ def create_epub(
    epub.write_epub(epub_fname, book)


+class HeadingParser(HTMLParser):
+    tag_re = re.compile("h(\d+)")
+    _curr_level = None
+    _curr_id = None
+    _curr_title = None
+    
+    def __init__(self, *args, **kwargs):
+        self.headings = []
+        super().__init__(*args, **kwargs)
+
+    def heading_level(self, tag):
+        match = self.tag_re.match(tag)
+        if match:
+            return int(match.group(1))
+        else:
+            return None
+    
+    def handle_starttag(self, tag, attrs):
+        this_level = self.heading_level(tag)
+        if this_level is not None:
+            # Found a heading, so process the properties
+            self._curr_level = this_level
+            attrs = {k: v for k, v in attrs}
+            self._curr_id = attrs.get('id')
+            
+    def handle_endtag(self, tag):
+        this_level = self.heading_level(tag)
+        if this_level is not None and this_level == self._curr_level:
+            heading = {
+                "level": this_level,
+                "id": self._curr_id,
+                "title": self._curr_title
+            }
+            self.headings.append(heading)
+
+    def handle_data(self, data):
+        # Save the title
+        if self._curr_level is not None:
+            self._curr_title = data
+
+
+def toc_from_headings(html: str, filename: str = "", chapter_title: str = "Sheet") -> list:
+    """Accept a chapter of HTML, and extract a table of contents segment.
+
+    Parameters
+    ----------
+    html
+      The HTML block to be parsed.
+    filename
+      The name of this file to be used for hrefs. E.g.
+      "index.html#heading_1".
+    
+    Returns
+    -------
+    toc
+      A sequence of table-of-contents links.
+    
+    """
+    # [(<ebooklib.epub.Section at 0x7fdf903595d0>,
+    #   [(<ebooklib.epub.Section at 0x7fdf90359310>,
+    #     [<ebooklib.epub.Link at 0x7fdf90359bd0>,
+    #      <ebooklib.epub.Link at 0x7fdf90359c50>])])]
+    # Parse the HTML
+    parser = HeadingParser()
+    parser.feed(html)
+    headings = parser.headings
+    # Parse into a table of contents
+    if len(headings) == 0:
+        # No headings found, so just the chapter link
+        toc = epub.Link(href=filename, title=chapter_title, uid=filename)
+    else:
+        # Add a section for the chapter as a whole
+        toc = (epub.Section(href=filename, title=chapter_title), [])
+        sections_stack = [toc]
+        # Parse all the headings
+        for idx, heading in enumerate(headings):
+            # Determine where we are in the tree
+            href = f"{filename}#{heading['id']}"
+            parent_section = sections_stack[-1]
+            is_last = idx == (len(headings) - 1)
+            is_leaf = is_last or heading['level'] >= headings[idx+1]['level']
+            # Add a leaf or branch depending on the heading structure
+            if is_leaf:
+                parent_section[1].append(epub.Link(href=href, title=heading['title'], uid=href))
+            else:
+                new_section = (epub.Section(href=href, title=heading['title']),
+                               [])
+                parent_section[1].append(new_section)
+                sections_stack.append(new_section)
+            # Walk back up the stack
+            if not is_last:
+                for idx in range(max(0, heading['level'] - headings[idx + 1]['level'])):
+                    sections_stack.pop()
+
+    return toc
+
+
 def html_parts(
    input_string,
    source_path=None,
@@ -37,3 +37,6 @@ div.system-message {
  border-width: 2px;
  color: red;
 }
+.literal {
+    font-family: monospace;
+}
@@ -1,10 +1,10 @@
 <h1 id="gm-monsters">Monsters</h1>

 [% for monster in monsters|sort(attribute='name') %]
-<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h1>
+<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h2>

 [% if monster.description %]  
-<h3>[[ monster.description ]]</h2>
+<h3>[[ monster.description ]]</h3>
 [% endif %]

 <!-- Basic properties -->
@@ -279,7 +279,7 @@ def make_gm_sheet(
    gm_props.pop("sheet_type")
    if len(gm_props.keys()) > 0:
        msg = f"Unhandled attributes in '{str(gm_file)}': {','.join(gm_props.keys())}"
-        log.warn(msg)
+        log.warning(msg)
        warnings.warn(msg)
    # Produce the combined output depending on the format requested
    if output_format == "pdf":
@@ -0,0 +1,40 @@
+from unittest import TestCase
+
+from ebooklib import epub
+
+
+from dungeonsheets.epub import toc_from_headings
+
+
+class TOCTestCase(TestCase):
+    def test_toc_from_no_headings(self):
+        html = '<p>Hello, world</p>'
+        toc = toc_from_headings(html)
+        self.assertIsInstance(toc, epub.Link)
+    
+    def test_toc_from_single_heading(self):
+        html = '<h1 id="hello_world">Hello, world</h1>'
+        toc = toc_from_headings(html)
+        self.assertIsInstance(toc, tuple)
+        self.assertIsInstance(toc[0], epub.Section)
+        self.assertIsInstance(toc[1], list)
+
+    def test_toc_from_heading_tree(self):
+        html = ('<h1 id="other_world">Other, world</h1>'
+                '<h2 id="other_country">Other, country</h2>'
+                '<h1 id="hello_world">Hello, world</h1>'
+                '<h2 id="hello_country">Hello, country</h2>'
+                '<h2 id="goodbye_country">Goodbye, country</h2>'
+                '<h3 id="hello_city">Hello, city</h3>'
+                '<h1 id="whatever">Whatever</h1>'
+                )
+        toc = toc_from_headings(html)
+        heading_toc = toc[1]
+        self.assertIsInstance(heading_toc, list)
+        self.assertIsInstance(heading_toc[0][0], epub.Section)
+        self.assertEqual(heading_toc[0][0].title, "Other, world")
+        self.assertIsInstance(heading_toc[2], epub.Link)
+        self.assertEqual(heading_toc[2].title, "Whatever")
+        self.assertIsInstance(heading_toc[2], epub.Link)
+        self.assertIsInstance(heading_toc[1][1][0], epub.Link)
+        self.assertEqual(heading_toc[1][1][0].title, "Hello, country")
@@ -55,10 +55,10 @@ class MakeSheetsTestCase(unittest.TestCase):
 class EpubOutputTestCase(unittest.TestCase):
    gm_epub = Path(f"{GMFILE.stem}.epub").resolve()

-    # def tearDown(self):
-    #     for f in [self.gm_epub]:
-    #         if f.exists():
-    #             f.unlink()
+    def tearDown(self):
+        for f in [self.gm_epub]:
+            if f.exists():
+                f.unlink()

    def test_file_created(self):
        # Check that a file is created once the function is run