Added table of contents to epub.

This commit is contained in:
Mark Wolfman
2021-07-06 23:48:36 -05:00
parent b1e69182eb
commit 8a054b335f
6 changed files with 156 additions and 10 deletions
+106 -3
View File
@@ -1,4 +1,6 @@
from typing import Mapping
from html.parser import HTMLParser
import re
from ebooklib import epub, ITEM_STYLE
from docutils import core
@@ -40,11 +42,13 @@ def create_epub(
style = css_template.render(use_dnd_decorations=use_dnd_decorations)
css = epub.EpubItem(uid="style_default", file_name="style/gm_sheet.css",
media_type="text/css", content=style)
book.add_item(css)
book.add_item(css)
toc = ["nav"]
# Create the separate chapters
html_chapters = []
for chap_title, content in chapters.items():
chap_fname = "{}.html".format(chap_title.replace(" ", "_").lower())
chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
chap_fname = "{}.html".format(chap_fname)
chapter = epub.EpubHtml(title=chap_title,
file_name=chap_fname, lang="en",
media_type="application/xhtml+xml")
@@ -52,8 +56,10 @@ def create_epub(
chapter.add_item(css)
book.add_item(chapter)
html_chapters.append(chapter)
# Add entries for the table of contents
toc.append(toc_from_headings(html=content, filename=chap_fname, chapter_title=chap_title))
# Add the table of contents
book.toc = html_chapters
book.toc = toc
book.spine = ("nav", *html_chapters)
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
@@ -63,6 +69,103 @@ def create_epub(
epub.write_epub(epub_fname, book)
class HeadingParser(HTMLParser):
tag_re = re.compile("h(\d+)")
_curr_level = None
_curr_id = None
_curr_title = None
def __init__(self, *args, **kwargs):
self.headings = []
super().__init__(*args, **kwargs)
def heading_level(self, tag):
match = self.tag_re.match(tag)
if match:
return int(match.group(1))
else:
return None
def handle_starttag(self, tag, attrs):
this_level = self.heading_level(tag)
if this_level is not None:
# Found a heading, so process the properties
self._curr_level = this_level
attrs = {k: v for k, v in attrs}
self._curr_id = attrs.get('id')
def handle_endtag(self, tag):
this_level = self.heading_level(tag)
if this_level is not None and this_level == self._curr_level:
heading = {
"level": this_level,
"id": self._curr_id,
"title": self._curr_title
}
self.headings.append(heading)
def handle_data(self, data):
# Save the title
if self._curr_level is not None:
self._curr_title = data
def toc_from_headings(html: str, filename: str = "", chapter_title: str = "Sheet") -> list:
"""Accept a chapter of HTML, and extract a table of contents segment.
Parameters
----------
html
The HTML block to be parsed.
filename
The name of this file to be used for hrefs. E.g.
"index.html#heading_1".
Returns
-------
toc
A sequence of table-of-contents links.
"""
# [(<ebooklib.epub.Section at 0x7fdf903595d0>,
# [(<ebooklib.epub.Section at 0x7fdf90359310>,
# [<ebooklib.epub.Link at 0x7fdf90359bd0>,
# <ebooklib.epub.Link at 0x7fdf90359c50>])])]
# Parse the HTML
parser = HeadingParser()
parser.feed(html)
headings = parser.headings
# Parse into a table of contents
if len(headings) == 0:
# No headings found, so just the chapter link
toc = epub.Link(href=filename, title=chapter_title, uid=filename)
else:
# Add a section for the chapter as a whole
toc = (epub.Section(href=filename, title=chapter_title), [])
sections_stack = [toc]
# Parse all the headings
for idx, heading in enumerate(headings):
# Determine where we are in the tree
href = f"{filename}#{heading['id']}"
parent_section = sections_stack[-1]
is_last = idx == (len(headings) - 1)
is_leaf = is_last or heading['level'] >= headings[idx+1]['level']
# Add a leaf or branch depending on the heading structure
if is_leaf:
parent_section[1].append(epub.Link(href=href, title=heading['title'], uid=href))
else:
new_section = (epub.Section(href=href, title=heading['title']),
[])
parent_section[1].append(new_section)
sections_stack.append(new_section)
# Walk back up the stack
if not is_last:
for idx in range(max(0, heading['level'] - headings[idx + 1]['level'])):
sections_stack.pop()
return toc
def html_parts(
input_string,
source_path=None,
@@ -37,3 +37,6 @@ div.system-message {
border-width: 2px;
color: red;
}
.literal {
font-family: monospace;
}
+2 -2
View File
@@ -1,10 +1,10 @@
<h1 id="gm-monsters">Monsters</h1>
[% for monster in monsters|sort(attribute='name') %]
<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h1>
<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h2>
[% if monster.description %]
<h3>[[ monster.description ]]</h2>
<h3>[[ monster.description ]]</h3>
[% endif %]
<!-- Basic properties -->
+1 -1
View File
@@ -279,7 +279,7 @@ def make_gm_sheet(
gm_props.pop("sheet_type")
if len(gm_props.keys()) > 0:
msg = f"Unhandled attributes in '{str(gm_file)}': {','.join(gm_props.keys())}"
log.warn(msg)
log.warning(msg)
warnings.warn(msg)
# Produce the combined output depending on the format requested
if output_format == "pdf":
+40
View File
@@ -0,0 +1,40 @@
from unittest import TestCase
from ebooklib import epub
from dungeonsheets.epub import toc_from_headings
class TOCTestCase(TestCase):
def test_toc_from_no_headings(self):
html = '<p>Hello, world</p>'
toc = toc_from_headings(html)
self.assertIsInstance(toc, epub.Link)
def test_toc_from_single_heading(self):
html = '<h1 id="hello_world">Hello, world</h1>'
toc = toc_from_headings(html)
self.assertIsInstance(toc, tuple)
self.assertIsInstance(toc[0], epub.Section)
self.assertIsInstance(toc[1], list)
def test_toc_from_heading_tree(self):
html = ('<h1 id="other_world">Other, world</h1>'
'<h2 id="other_country">Other, country</h2>'
'<h1 id="hello_world">Hello, world</h1>'
'<h2 id="hello_country">Hello, country</h2>'
'<h2 id="goodbye_country">Goodbye, country</h2>'
'<h3 id="hello_city">Hello, city</h3>'
'<h1 id="whatever">Whatever</h1>'
)
toc = toc_from_headings(html)
heading_toc = toc[1]
self.assertIsInstance(heading_toc, list)
self.assertIsInstance(heading_toc[0][0], epub.Section)
self.assertEqual(heading_toc[0][0].title, "Other, world")
self.assertIsInstance(heading_toc[2], epub.Link)
self.assertEqual(heading_toc[2].title, "Whatever")
self.assertIsInstance(heading_toc[2], epub.Link)
self.assertIsInstance(heading_toc[1][1][0], epub.Link)
self.assertEqual(heading_toc[1][1][0].title, "Hello, country")
+4 -4
View File
@@ -55,10 +55,10 @@ class MakeSheetsTestCase(unittest.TestCase):
class EpubOutputTestCase(unittest.TestCase):
gm_epub = Path(f"{GMFILE.stem}.epub").resolve()
# def tearDown(self):
# for f in [self.gm_epub]:
# if f.exists():
# f.unlink()
def tearDown(self):
for f in [self.gm_epub]:
if f.exists():
f.unlink()
def test_file_created(self):
# Check that a file is created once the function is run