mirror of
https://github.com/Threnklyn/dungeon-sheets.git
synced 2026-05-18 20:23:27 +02:00
274 lines
8.6 KiB
Python
274 lines
8.6 KiB
Python
from typing import Mapping
|
|
from html.parser import HTMLParser
|
|
import re
|
|
|
|
from ebooklib import epub, ITEM_STYLE
|
|
from docutils import core
|
|
from sphinx.util.docstrings import prepare_docstring
|
|
from docutils.writers.html5_polyglot import Writer as HTMLWriter
|
|
|
|
from dungeonsheets.forms import dice_re, jinja_environment
|
|
|
|
|
|
def create_epub(
|
|
chapters: Mapping, title: str, basename: str, use_dnd_decorations: bool = False
|
|
):
|
|
"""Prepare an EPUB file from the list of chapters.
|
|
|
|
Parameters
|
|
==========
|
|
chapters
|
|
A mapping where the keys are chapter names (spines) and the
|
|
values are strings of HTML to be rendered as the chapter
|
|
contents.
|
|
basename
|
|
The basename for saving files (PDFs, etc). The resulting epub
|
|
file will be "{basename}.epub".
|
|
use_dnd_decorations
|
|
If true, style sheets will be included to produce D&D stylized
|
|
stat blocks, etc.
|
|
|
|
"""
|
|
# Create a new epub book
|
|
book = epub.EpubBook()
|
|
book.set_identifier("id123456")
|
|
book.set_title(title)
|
|
book.set_language("en")
|
|
# Add the css files
|
|
css_template = jinja_env.get_template("dungeonsheets_epub.css")
|
|
style = css_template.render(use_dnd_decorations=use_dnd_decorations)
|
|
css = epub.EpubItem(
|
|
uid="style_default",
|
|
file_name="style/gm_sheet.css",
|
|
media_type="text/css",
|
|
content=style,
|
|
)
|
|
book.add_item(css)
|
|
toc = ["nav"]
|
|
# Create the separate chapters
|
|
html_chapters = []
|
|
for chap_title, content in chapters.items():
|
|
chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
|
|
chap_fname = "{}.html".format(chap_fname)
|
|
chapter = epub.EpubHtml(
|
|
title=chap_title,
|
|
file_name=chap_fname,
|
|
lang="en",
|
|
media_type="application/xhtml+xml",
|
|
)
|
|
chapter.set_content(content)
|
|
chapter.add_item(css)
|
|
book.add_item(chapter)
|
|
html_chapters.append(chapter)
|
|
# Add entries for the table of contents
|
|
toc.append(
|
|
toc_from_headings(
|
|
html=content, filename=chap_fname, chapter_title=chap_title
|
|
)
|
|
)
|
|
# Add the table of contents
|
|
book.toc = toc
|
|
book.spine = ("nav", *html_chapters)
|
|
# add default NCX and Nav file
|
|
book.add_item(epub.EpubNcx())
|
|
book.add_item(epub.EpubNav())
|
|
# Save the file
|
|
epub_fname = f"{basename}.epub"
|
|
epub.write_epub(epub_fname, book)
|
|
|
|
|
|
class HeadingParser(HTMLParser):
|
|
tag_re = re.compile("h(\d+)")
|
|
_curr_level = None
|
|
_curr_id = None
|
|
_curr_title = None
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
self.headings = []
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def heading_level(self, tag):
|
|
match = self.tag_re.match(tag)
|
|
if match:
|
|
return int(match.group(1))
|
|
else:
|
|
return None
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
this_level = self.heading_level(tag)
|
|
if this_level is not None:
|
|
# Found a heading, so process the properties
|
|
self._curr_level = this_level
|
|
attrs = {k: v for k, v in attrs}
|
|
self._curr_id = attrs.get("id")
|
|
|
|
def handle_endtag(self, tag):
|
|
this_level = self.heading_level(tag)
|
|
if this_level is not None and this_level == self._curr_level:
|
|
heading = {
|
|
"level": this_level,
|
|
"id": self._curr_id,
|
|
"title": self._curr_title,
|
|
}
|
|
self.headings.append(heading)
|
|
|
|
def handle_data(self, data):
|
|
# Save the title
|
|
if self._curr_level is not None:
|
|
self._curr_title = data
|
|
|
|
|
|
def toc_from_headings(
|
|
html: str, filename: str = "", chapter_title: str = "Sheet"
|
|
) -> list:
|
|
"""Accept a chapter of HTML, and extract a table of contents segment.
|
|
|
|
Parameters
|
|
----------
|
|
html
|
|
The HTML block to be parsed.
|
|
filename
|
|
The name of this file to be used for hrefs. E.g.
|
|
"index.html#heading_1".
|
|
|
|
Returns
|
|
-------
|
|
toc
|
|
A sequence of table-of-contents links.
|
|
|
|
"""
|
|
# [(<ebooklib.epub.Section at 0x7fdf903595d0>,
|
|
# [(<ebooklib.epub.Section at 0x7fdf90359310>,
|
|
# [<ebooklib.epub.Link at 0x7fdf90359bd0>,
|
|
# <ebooklib.epub.Link at 0x7fdf90359c50>])])]
|
|
# Parse the HTML
|
|
parser = HeadingParser()
|
|
parser.feed(html)
|
|
headings = parser.headings
|
|
# Parse into a table of contents
|
|
if len(headings) == 0:
|
|
# No headings found, so just the chapter link
|
|
toc = epub.Link(href=filename, title=chapter_title, uid=filename)
|
|
else:
|
|
# Add a section for the chapter as a whole
|
|
toc = (epub.Section(href=filename, title=chapter_title), [])
|
|
sections_stack = [toc]
|
|
# Parse all the headings
|
|
for idx, heading in enumerate(headings):
|
|
# Determine where we are in the tree
|
|
href = f"{filename}#{heading['id']}"
|
|
parent_section = sections_stack[-1]
|
|
is_last = idx == (len(headings) - 1)
|
|
is_leaf = is_last or heading["level"] >= headings[idx + 1]["level"]
|
|
# Add a leaf or branch depending on the heading structure
|
|
if is_leaf:
|
|
parent_section[1].append(
|
|
epub.Link(href=href, title=heading["title"], uid=href)
|
|
)
|
|
else:
|
|
new_section = (epub.Section(href=href, title=heading["title"]), [])
|
|
parent_section[1].append(new_section)
|
|
sections_stack.append(new_section)
|
|
# Walk back up the stack
|
|
if not is_last:
|
|
for idx in range(max(0, heading["level"] - headings[idx + 1]["level"])):
|
|
sections_stack.pop()
|
|
|
|
return toc
|
|
|
|
|
|
def html_parts(
|
|
input_string,
|
|
source_path=None,
|
|
destination_path=None,
|
|
input_encoding="unicode",
|
|
doctitle=True,
|
|
initial_header_level=1,
|
|
):
|
|
"""
|
|
Given an input string, returns a dictionary of HTML document parts.
|
|
|
|
Dictionary keys are the names of parts, and values are Unicode strings;
|
|
encoding is up to the client.
|
|
|
|
Parameters:
|
|
|
|
- `input_string`: A multi-line text string; required.
|
|
- `source_path`: Path to the source file or object. Optional, but useful
|
|
for diagnostic output (system messages).
|
|
- `destination_path`: Path to the file or object which will receive the
|
|
output; optional. Used for determining relative paths (stylesheets,
|
|
source links, etc.).
|
|
- `input_encoding`: The encoding of `input_string`. If it is an encoded
|
|
8-bit string, provide the correct encoding. If it is a Unicode string,
|
|
use "unicode", the default.
|
|
- `doctitle`: Disable the promotion of a lone top-level section title to
|
|
document title (and subsequent section title to document subtitle
|
|
promotion); enabled by default.
|
|
- `initial_header_level`: The initial level for header elements (e.g. 1
|
|
for "<h1>").
|
|
"""
|
|
# Remove indentation, etc
|
|
input_string = "\n".join(prepare_docstring(input_string))
|
|
# Parse from rst to TeX
|
|
overrides = {
|
|
"input_encoding": input_encoding,
|
|
"doctitle_xform": doctitle,
|
|
"initial_header_level": initial_header_level,
|
|
}
|
|
writer = HTMLWriter()
|
|
parts = core.publish_parts(
|
|
source=input_string,
|
|
source_path=source_path,
|
|
destination_path=destination_path,
|
|
writer=writer,
|
|
settings_overrides=overrides,
|
|
)
|
|
return parts
|
|
|
|
|
|
def rst_to_html(rst, top_heading_level=0):
|
|
"""Basic markup of reST to HTML code.
|
|
|
|
The translation between reST headings and LaTeX headings is
|
|
modified by the *top_heading_level* parameter. A value of 0
|
|
(default) translates "# Heading" -> "<h1>{Heading}</h1>". A value
|
|
of 1 translates "# Heading" -> "<h2>{Heading}</h2>", etc.
|
|
|
|
Note: heading translation is currently broken.
|
|
|
|
Parameters
|
|
==========
|
|
rst
|
|
reStructured text input to be parsed.
|
|
top_heading_level : optional
|
|
The highest level heading that will be added to the HTML as
|
|
described above.
|
|
|
|
Returns
|
|
=======
|
|
html : str
|
|
The reST text parsed into HTML markup.
|
|
|
|
"""
|
|
if rst is None:
|
|
# No reST, so return an empty string
|
|
html = ""
|
|
else:
|
|
# Mark hit dice in monospace font
|
|
rst = dice_re.sub(r"``\1``", rst)
|
|
_html_parts = html_parts(rst)
|
|
html = _html_parts["body"]
|
|
return html
|
|
|
|
|
|
def to_heading_id(inpt: str) -> str:
|
|
"""Take a string and make it suitable for use as an HTML header id."""
|
|
return inpt.replace(" ", "-")
|
|
|
|
|
|
# Prepare the jinja environment
|
|
jinja_env = jinja_environment()
|
|
jinja_env.filters["rst_to_html"] = rst_to_html
|
|
jinja_env.filters["to_heading_id"] = to_heading_id
|