From 8a054b335f9a57d1ac9a9b9aa34bd4be90c52fcf Mon Sep 17 00:00:00 2001 From: Mark Wolfman Date: Tue, 6 Jul 2021 23:48:36 -0500 Subject: [PATCH] Added table of contents to epub. --- dungeonsheets/epub.py | 109 ++++++++++++++++++++- dungeonsheets/forms/dungeonsheets_epub.css | 3 + dungeonsheets/forms/monsters_template.html | 4 +- dungeonsheets/make_sheets.py | 2 +- tests/test_epub.py | 40 ++++++++ tests/test_make_sheets.py | 8 +- 6 files changed, 156 insertions(+), 10 deletions(-) create mode 100644 tests/test_epub.py diff --git a/dungeonsheets/epub.py b/dungeonsheets/epub.py index a1c6217..7191ee4 100644 --- a/dungeonsheets/epub.py +++ b/dungeonsheets/epub.py @@ -1,4 +1,6 @@ from typing import Mapping +from html.parser import HTMLParser +import re from ebooklib import epub, ITEM_STYLE from docutils import core @@ -40,11 +42,13 @@ def create_epub( style = css_template.render(use_dnd_decorations=use_dnd_decorations) css = epub.EpubItem(uid="style_default", file_name="style/gm_sheet.css", media_type="text/css", content=style) - book.add_item(css) + book.add_item(css) + toc = ["nav"] # Create the separate chapters html_chapters = [] for chap_title, content in chapters.items(): - chap_fname = "{}.html".format(chap_title.replace(" ", "_").lower()) + chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower() + chap_fname = "{}.html".format(chap_fname) chapter = epub.EpubHtml(title=chap_title, file_name=chap_fname, lang="en", media_type="application/xhtml+xml") @@ -52,8 +56,10 @@ def create_epub( chapter.add_item(css) book.add_item(chapter) html_chapters.append(chapter) + # Add entries for the table of contents + toc.append(toc_from_headings(html=content, filename=chap_fname, chapter_title=chap_title)) # Add the table of contents - book.toc = html_chapters + book.toc = toc book.spine = ("nav", *html_chapters) # add default NCX and Nav file book.add_item(epub.EpubNcx()) @@ -63,6 +69,103 @@ def create_epub( epub.write_epub(epub_fname, book) +class HeadingParser(HTMLParser): + tag_re = re.compile("h(\d+)") + _curr_level = None + _curr_id = None + _curr_title = None + + def __init__(self, *args, **kwargs): + self.headings = [] + super().__init__(*args, **kwargs) + + def heading_level(self, tag): + match = self.tag_re.match(tag) + if match: + return int(match.group(1)) + else: + return None + + def handle_starttag(self, tag, attrs): + this_level = self.heading_level(tag) + if this_level is not None: + # Found a heading, so process the properties + self._curr_level = this_level + attrs = {k: v for k, v in attrs} + self._curr_id = attrs.get('id') + + def handle_endtag(self, tag): + this_level = self.heading_level(tag) + if this_level is not None and this_level == self._curr_level: + heading = { + "level": this_level, + "id": self._curr_id, + "title": self._curr_title + } + self.headings.append(heading) + + def handle_data(self, data): + # Save the title + if self._curr_level is not None: + self._curr_title = data + + +def toc_from_headings(html: str, filename: str = "", chapter_title: str = "Sheet") -> list: + """Accept a chapter of HTML, and extract a table of contents segment. + + Parameters + ---------- + html + The HTML block to be parsed. + filename + The name of this file to be used for hrefs. E.g. + "index.html#heading_1". + + Returns + ------- + toc + A sequence of table-of-contents links. + + """ + # [(, + # [(, + # [, + # ])])] + # Parse the HTML + parser = HeadingParser() + parser.feed(html) + headings = parser.headings + # Parse into a table of contents + if len(headings) == 0: + # No headings found, so just the chapter link + toc = epub.Link(href=filename, title=chapter_title, uid=filename) + else: + # Add a section for the chapter as a whole + toc = (epub.Section(href=filename, title=chapter_title), []) + sections_stack = [toc] + # Parse all the headings + for idx, heading in enumerate(headings): + # Determine where we are in the tree + href = f"{filename}#{heading['id']}" + parent_section = sections_stack[-1] + is_last = idx == (len(headings) - 1) + is_leaf = is_last or heading['level'] >= headings[idx+1]['level'] + # Add a leaf or branch depending on the heading structure + if is_leaf: + parent_section[1].append(epub.Link(href=href, title=heading['title'], uid=href)) + else: + new_section = (epub.Section(href=href, title=heading['title']), + []) + parent_section[1].append(new_section) + sections_stack.append(new_section) + # Walk back up the stack + if not is_last: + for idx in range(max(0, heading['level'] - headings[idx + 1]['level'])): + sections_stack.pop() + + return toc + + def html_parts( input_string, source_path=None, diff --git a/dungeonsheets/forms/dungeonsheets_epub.css b/dungeonsheets/forms/dungeonsheets_epub.css index 08af226..2827fcc 100644 --- a/dungeonsheets/forms/dungeonsheets_epub.css +++ b/dungeonsheets/forms/dungeonsheets_epub.css @@ -37,3 +37,6 @@ div.system-message { border-width: 2px; color: red; } +.literal { + font-family: monospace; +} diff --git a/dungeonsheets/forms/monsters_template.html b/dungeonsheets/forms/monsters_template.html index fa59ca6..929afea 100644 --- a/dungeonsheets/forms/monsters_template.html +++ b/dungeonsheets/forms/monsters_template.html @@ -1,10 +1,10 @@

Monsters

[% for monster in monsters|sort(attribute='name') %] -

[[ monster.name ]]

+

[[ monster.name ]]

[% if monster.description %] -

[[ monster.description ]]

+

[[ monster.description ]]

[% endif %] diff --git a/dungeonsheets/make_sheets.py b/dungeonsheets/make_sheets.py index 49818a0..b3f8a4a 100755 --- a/dungeonsheets/make_sheets.py +++ b/dungeonsheets/make_sheets.py @@ -279,7 +279,7 @@ def make_gm_sheet( gm_props.pop("sheet_type") if len(gm_props.keys()) > 0: msg = f"Unhandled attributes in '{str(gm_file)}': {','.join(gm_props.keys())}" - log.warn(msg) + log.warning(msg) warnings.warn(msg) # Produce the combined output depending on the format requested if output_format == "pdf": diff --git a/tests/test_epub.py b/tests/test_epub.py new file mode 100644 index 0000000..2fa99c3 --- /dev/null +++ b/tests/test_epub.py @@ -0,0 +1,40 @@ +from unittest import TestCase + +from ebooklib import epub + + +from dungeonsheets.epub import toc_from_headings + + +class TOCTestCase(TestCase): + def test_toc_from_no_headings(self): + html = '

Hello, world

' + toc = toc_from_headings(html) + self.assertIsInstance(toc, epub.Link) + + def test_toc_from_single_heading(self): + html = '

Hello, world

' + toc = toc_from_headings(html) + self.assertIsInstance(toc, tuple) + self.assertIsInstance(toc[0], epub.Section) + self.assertIsInstance(toc[1], list) + + def test_toc_from_heading_tree(self): + html = ('

Other, world

' + '

Other, country

' + '

Hello, world

' + '

Hello, country

' + '

Goodbye, country

' + '

Hello, city

' + '

Whatever

' + ) + toc = toc_from_headings(html) + heading_toc = toc[1] + self.assertIsInstance(heading_toc, list) + self.assertIsInstance(heading_toc[0][0], epub.Section) + self.assertEqual(heading_toc[0][0].title, "Other, world") + self.assertIsInstance(heading_toc[2], epub.Link) + self.assertEqual(heading_toc[2].title, "Whatever") + self.assertIsInstance(heading_toc[2], epub.Link) + self.assertIsInstance(heading_toc[1][1][0], epub.Link) + self.assertEqual(heading_toc[1][1][0].title, "Hello, country") diff --git a/tests/test_make_sheets.py b/tests/test_make_sheets.py index 682de02..a3d8227 100644 --- a/tests/test_make_sheets.py +++ b/tests/test_make_sheets.py @@ -55,10 +55,10 @@ class MakeSheetsTestCase(unittest.TestCase): class EpubOutputTestCase(unittest.TestCase): gm_epub = Path(f"{GMFILE.stem}.epub").resolve() - # def tearDown(self): - # for f in [self.gm_epub]: - # if f.exists(): - # f.unlink() + def tearDown(self): + for f in [self.gm_epub]: + if f.exists(): + f.unlink() def test_file_created(self): # Check that a file is created once the function is run