mirror of
https://github.com/Threnklyn/dungeon-sheets.git
synced 2026-06-07 13:15:53 +02:00
Added table of contents to epub.
This commit is contained in:
+106
-3
@@ -1,4 +1,6 @@
|
||||
from typing import Mapping
|
||||
from html.parser import HTMLParser
|
||||
import re
|
||||
|
||||
from ebooklib import epub, ITEM_STYLE
|
||||
from docutils import core
|
||||
@@ -40,11 +42,13 @@ def create_epub(
|
||||
style = css_template.render(use_dnd_decorations=use_dnd_decorations)
|
||||
css = epub.EpubItem(uid="style_default", file_name="style/gm_sheet.css",
|
||||
media_type="text/css", content=style)
|
||||
book.add_item(css)
|
||||
book.add_item(css)
|
||||
toc = ["nav"]
|
||||
# Create the separate chapters
|
||||
html_chapters = []
|
||||
for chap_title, content in chapters.items():
|
||||
chap_fname = "{}.html".format(chap_title.replace(" ", "_").lower())
|
||||
chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
|
||||
chap_fname = "{}.html".format(chap_fname)
|
||||
chapter = epub.EpubHtml(title=chap_title,
|
||||
file_name=chap_fname, lang="en",
|
||||
media_type="application/xhtml+xml")
|
||||
@@ -52,8 +56,10 @@ def create_epub(
|
||||
chapter.add_item(css)
|
||||
book.add_item(chapter)
|
||||
html_chapters.append(chapter)
|
||||
# Add entries for the table of contents
|
||||
toc.append(toc_from_headings(html=content, filename=chap_fname, chapter_title=chap_title))
|
||||
# Add the table of contents
|
||||
book.toc = html_chapters
|
||||
book.toc = toc
|
||||
book.spine = ("nav", *html_chapters)
|
||||
# add default NCX and Nav file
|
||||
book.add_item(epub.EpubNcx())
|
||||
@@ -63,6 +69,103 @@ def create_epub(
|
||||
epub.write_epub(epub_fname, book)
|
||||
|
||||
|
||||
class HeadingParser(HTMLParser):
|
||||
tag_re = re.compile("h(\d+)")
|
||||
_curr_level = None
|
||||
_curr_id = None
|
||||
_curr_title = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.headings = []
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def heading_level(self, tag):
|
||||
match = self.tag_re.match(tag)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
else:
|
||||
return None
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
this_level = self.heading_level(tag)
|
||||
if this_level is not None:
|
||||
# Found a heading, so process the properties
|
||||
self._curr_level = this_level
|
||||
attrs = {k: v for k, v in attrs}
|
||||
self._curr_id = attrs.get('id')
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
this_level = self.heading_level(tag)
|
||||
if this_level is not None and this_level == self._curr_level:
|
||||
heading = {
|
||||
"level": this_level,
|
||||
"id": self._curr_id,
|
||||
"title": self._curr_title
|
||||
}
|
||||
self.headings.append(heading)
|
||||
|
||||
def handle_data(self, data):
|
||||
# Save the title
|
||||
if self._curr_level is not None:
|
||||
self._curr_title = data
|
||||
|
||||
|
||||
def toc_from_headings(html: str, filename: str = "", chapter_title: str = "Sheet") -> list:
|
||||
"""Accept a chapter of HTML, and extract a table of contents segment.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
html
|
||||
The HTML block to be parsed.
|
||||
filename
|
||||
The name of this file to be used for hrefs. E.g.
|
||||
"index.html#heading_1".
|
||||
|
||||
Returns
|
||||
-------
|
||||
toc
|
||||
A sequence of table-of-contents links.
|
||||
|
||||
"""
|
||||
# [(<ebooklib.epub.Section at 0x7fdf903595d0>,
|
||||
# [(<ebooklib.epub.Section at 0x7fdf90359310>,
|
||||
# [<ebooklib.epub.Link at 0x7fdf90359bd0>,
|
||||
# <ebooklib.epub.Link at 0x7fdf90359c50>])])]
|
||||
# Parse the HTML
|
||||
parser = HeadingParser()
|
||||
parser.feed(html)
|
||||
headings = parser.headings
|
||||
# Parse into a table of contents
|
||||
if len(headings) == 0:
|
||||
# No headings found, so just the chapter link
|
||||
toc = epub.Link(href=filename, title=chapter_title, uid=filename)
|
||||
else:
|
||||
# Add a section for the chapter as a whole
|
||||
toc = (epub.Section(href=filename, title=chapter_title), [])
|
||||
sections_stack = [toc]
|
||||
# Parse all the headings
|
||||
for idx, heading in enumerate(headings):
|
||||
# Determine where we are in the tree
|
||||
href = f"{filename}#{heading['id']}"
|
||||
parent_section = sections_stack[-1]
|
||||
is_last = idx == (len(headings) - 1)
|
||||
is_leaf = is_last or heading['level'] >= headings[idx+1]['level']
|
||||
# Add a leaf or branch depending on the heading structure
|
||||
if is_leaf:
|
||||
parent_section[1].append(epub.Link(href=href, title=heading['title'], uid=href))
|
||||
else:
|
||||
new_section = (epub.Section(href=href, title=heading['title']),
|
||||
[])
|
||||
parent_section[1].append(new_section)
|
||||
sections_stack.append(new_section)
|
||||
# Walk back up the stack
|
||||
if not is_last:
|
||||
for idx in range(max(0, heading['level'] - headings[idx + 1]['level'])):
|
||||
sections_stack.pop()
|
||||
|
||||
return toc
|
||||
|
||||
|
||||
def html_parts(
|
||||
input_string,
|
||||
source_path=None,
|
||||
|
||||
@@ -37,3 +37,6 @@ div.system-message {
|
||||
border-width: 2px;
|
||||
color: red;
|
||||
}
|
||||
.literal {
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
<h1 id="gm-monsters">Monsters</h1>
|
||||
|
||||
[% for monster in monsters|sort(attribute='name') %]
|
||||
<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h1>
|
||||
<h2 id="gm-monsters-[[ monster.name|to_heading_id ]]">[[ monster.name ]]</h2>
|
||||
|
||||
[% if monster.description %]
|
||||
<h3>[[ monster.description ]]</h2>
|
||||
<h3>[[ monster.description ]]</h3>
|
||||
[% endif %]
|
||||
|
||||
<!-- Basic properties -->
|
||||
|
||||
@@ -279,7 +279,7 @@ def make_gm_sheet(
|
||||
gm_props.pop("sheet_type")
|
||||
if len(gm_props.keys()) > 0:
|
||||
msg = f"Unhandled attributes in '{str(gm_file)}': {','.join(gm_props.keys())}"
|
||||
log.warn(msg)
|
||||
log.warning(msg)
|
||||
warnings.warn(msg)
|
||||
# Produce the combined output depending on the format requested
|
||||
if output_format == "pdf":
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from ebooklib import epub
|
||||
|
||||
|
||||
from dungeonsheets.epub import toc_from_headings
|
||||
|
||||
|
||||
class TOCTestCase(TestCase):
|
||||
def test_toc_from_no_headings(self):
|
||||
html = '<p>Hello, world</p>'
|
||||
toc = toc_from_headings(html)
|
||||
self.assertIsInstance(toc, epub.Link)
|
||||
|
||||
def test_toc_from_single_heading(self):
|
||||
html = '<h1 id="hello_world">Hello, world</h1>'
|
||||
toc = toc_from_headings(html)
|
||||
self.assertIsInstance(toc, tuple)
|
||||
self.assertIsInstance(toc[0], epub.Section)
|
||||
self.assertIsInstance(toc[1], list)
|
||||
|
||||
def test_toc_from_heading_tree(self):
|
||||
html = ('<h1 id="other_world">Other, world</h1>'
|
||||
'<h2 id="other_country">Other, country</h2>'
|
||||
'<h1 id="hello_world">Hello, world</h1>'
|
||||
'<h2 id="hello_country">Hello, country</h2>'
|
||||
'<h2 id="goodbye_country">Goodbye, country</h2>'
|
||||
'<h3 id="hello_city">Hello, city</h3>'
|
||||
'<h1 id="whatever">Whatever</h1>'
|
||||
)
|
||||
toc = toc_from_headings(html)
|
||||
heading_toc = toc[1]
|
||||
self.assertIsInstance(heading_toc, list)
|
||||
self.assertIsInstance(heading_toc[0][0], epub.Section)
|
||||
self.assertEqual(heading_toc[0][0].title, "Other, world")
|
||||
self.assertIsInstance(heading_toc[2], epub.Link)
|
||||
self.assertEqual(heading_toc[2].title, "Whatever")
|
||||
self.assertIsInstance(heading_toc[2], epub.Link)
|
||||
self.assertIsInstance(heading_toc[1][1][0], epub.Link)
|
||||
self.assertEqual(heading_toc[1][1][0].title, "Hello, country")
|
||||
@@ -55,10 +55,10 @@ class MakeSheetsTestCase(unittest.TestCase):
|
||||
class EpubOutputTestCase(unittest.TestCase):
|
||||
gm_epub = Path(f"{GMFILE.stem}.epub").resolve()
|
||||
|
||||
# def tearDown(self):
|
||||
# for f in [self.gm_epub]:
|
||||
# if f.exists():
|
||||
# f.unlink()
|
||||
def tearDown(self):
|
||||
for f in [self.gm_epub]:
|
||||
if f.exists():
|
||||
f.unlink()
|
||||
|
||||
def test_file_created(self):
|
||||
# Check that a file is created once the function is run
|
||||
|
||||
Reference in New Issue
Block a user