diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000..d24eb04
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,32 @@
+name: Python Lint with Black
+
+on: [push, pull_request]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version:
+ - "3.12"
+ - "3.11"
+ - "3.10"
+ - "3.9"
+ - "3.8"
+ - "3.7" # The oldest version supported by Github Actions
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: pip
+
+ - name: Install black
+ run: pipx install black
+
+ - name: Lint with Black
+ run: black --check ebooklib/
\ No newline at end of file
diff --git a/ebooklib/__init__.py b/ebooklib/__init__.py
index 5f5abd0..956b500 100644
--- a/ebooklib/__init__.py
+++ b/ebooklib/__init__.py
@@ -33,14 +33,15 @@
ITEM_SMIL = 11
# EXTENSION MAPPER
-EXTENSIONS = {ITEM_IMAGE: ['.jpg', '.jpeg', '.gif', '.tiff', '.tif', '.png'],
- ITEM_STYLE: ['.css'],
- ITEM_VECTOR: ['.svg'],
- ITEM_FONT: ['.otf', '.woff', '.ttf'],
- ITEM_SCRIPT: ['.js'],
- ITEM_NAVIGATION: ['.ncx'],
- ITEM_VIDEO: ['.mov', '.mp4', '.avi'],
- ITEM_AUDIO: ['.mp3', '.ogg'],
- ITEM_COVER: ['.jpg', '.jpeg', '.png'],
- ITEM_SMIL: ['.smil']
- }
+EXTENSIONS = {
+ ITEM_IMAGE: [".jpg", ".jpeg", ".gif", ".tiff", ".tif", ".png"],
+ ITEM_STYLE: [".css"],
+ ITEM_VECTOR: [".svg"],
+ ITEM_FONT: [".otf", ".woff", ".ttf"],
+ ITEM_SCRIPT: [".js"],
+ ITEM_NAVIGATION: [".ncx"],
+ ITEM_VIDEO: [".mov", ".mp4", ".avi"],
+ ITEM_AUDIO: [".mp3", ".ogg"],
+ ITEM_COVER: [".jpg", ".jpeg", ".png"],
+ ITEM_SMIL: [".smil"],
+}
diff --git a/ebooklib/epub.py b/ebooklib/epub.py
index 62fea67..772b9c6 100644
--- a/ebooklib/epub.py
+++ b/ebooklib/epub.py
@@ -33,40 +33,54 @@
import ebooklib
-from ebooklib.utils import parse_string, parse_html_string, guess_type, get_pages_for_items
+from ebooklib.utils import (
+ parse_string,
+ parse_html_string,
+ guess_type,
+ get_pages_for_items,
+)
# Version of EPUB library
VERSION = (0, 18, 1)
-NAMESPACES = {'XML': 'http://www.w3.org/XML/1998/namespace',
- 'EPUB': 'http://www.idpf.org/2007/ops',
- 'DAISY': 'http://www.daisy.org/z3986/2005/ncx/',
- 'OPF': 'http://www.idpf.org/2007/opf',
- 'CONTAINERNS': 'urn:oasis:names:tc:opendocument:xmlns:container',
- 'DC': 'http://purl.org/dc/elements/1.1/',
- 'XHTML': 'http://www.w3.org/1999/xhtml'}
+NAMESPACES = {
+ "XML": "http://www.w3.org/XML/1998/namespace",
+ "EPUB": "http://www.idpf.org/2007/ops",
+ "DAISY": "http://www.daisy.org/z3986/2005/ncx/",
+ "OPF": "http://www.idpf.org/2007/opf",
+ "CONTAINERNS": "urn:oasis:names:tc:opendocument:xmlns:container",
+ "DC": "http://purl.org/dc/elements/1.1/",
+ "XHTML": "http://www.w3.org/1999/xhtml",
+}
# XML Templates
-CONTAINER_PATH = 'META-INF/container.xml'
+CONTAINER_PATH = "META-INF/container.xml"
-CONTAINER_XML = '''
+CONTAINER_XML = """
-'''
+"""
-NCX_XML = six.b('''
-''')
+NCX_XML = six.b(
+ """
+"""
+)
-NAV_XML = six.b('''
''')
+NAV_XML = six.b(
+ """
-''')
+"""
+)
-IMAGE_MEDIA_TYPES = ['image/jpeg', 'image/jpg', 'image/png', 'image/svg+xml']
+IMAGE_MEDIA_TYPES = ["image/jpeg", "image/jpg", "image/png", "image/svg+xml"]
# TOC and navigation elements
+
class Section(object):
- def __init__(self, title, href=''):
+ def __init__(self, title, href=""):
self.title = title
self.href = href
@@ -100,6 +116,7 @@ def __init__(self, href, title, uid=None):
self.title = title
self.uid = uid
+
# Exceptions
@@ -112,16 +129,18 @@ def __init__(self, code, msg):
def __str__(self):
return repr(self.msg)
+
# Items
class EpubItem(object):
-
"""
Base class for the items in a book.
"""
- def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), manifest=True):
+ def __init__(
+ self, uid=None, file_name="", media_type="", content=six.b(""), manifest=True
+ ):
"""
:Args:
- uid: Unique identifier for this item (optional)
@@ -188,7 +207,7 @@ def get_type(self):
return ebooklib.ITEM_UNKNOWN
- def get_content(self, default=six.b('')):
+ def get_content(self, default=six.b("")):
"""
Returns content of the item. Content should be of type 'str' (Python 2) or 'bytes' (Python 3)
@@ -210,45 +229,56 @@ def set_content(self, content):
self.content = content
def __str__(self):
- return '' % self.id
+ return "" % self.id
class EpubNcx(EpubItem):
"Represents Navigation Control File (NCX) in the EPUB."
- def __init__(self, uid='ncx', file_name='toc.ncx'):
- super(EpubNcx, self).__init__(uid=uid, file_name=file_name, media_type='application/x-dtbncx+xml')
+ def __init__(self, uid="ncx", file_name="toc.ncx"):
+ super(EpubNcx, self).__init__(
+ uid=uid, file_name=file_name, media_type="application/x-dtbncx+xml"
+ )
def __str__(self):
- return '' % self.id
+ return "" % self.id
class EpubCover(EpubItem):
-
"""
Represents Cover image in the EPUB file.
"""
- def __init__(self, uid='cover-img', file_name=''):
+ def __init__(self, uid="cover-img", file_name=""):
super(EpubCover, self).__init__(uid=uid, file_name=file_name)
def get_type(self):
return ebooklib.ITEM_COVER
def __str__(self):
- return '' % (self.id, self.file_name)
+ return "" % (self.id, self.file_name)
class EpubHtml(EpubItem):
-
"""
Represents HTML document in the EPUB file.
"""
- _template_name = 'chapter'
- def __init__(self, uid=None, file_name='', media_type='', content=None, title='',
- lang=None, direction=None, media_overlay=None, media_duration=None):
+ _template_name = "chapter"
+
+ def __init__(
+ self,
+ uid=None,
+ file_name="",
+ media_type="",
+ content=None,
+ title="",
+ lang=None,
+ direction=None,
+ media_overlay=None,
+ media_duration=None,
+ ):
super(EpubHtml, self).__init__(uid, file_name, media_type, content)
self.title = title
@@ -305,9 +335,9 @@ def add_link(self, **kwgs):
>>> add_link(href='styles.css', rel='stylesheet', type='text/css')
"""
self.links.append(kwgs)
- if kwgs.get('type') == 'text/javascript':
- if 'scripted' not in self.properties:
- self.properties.append('scripted')
+ if kwgs.get("type") == "text/javascript":
+ if "scripted" not in self.properties:
+ self.properties.append("scripted")
def get_links(self):
"""
@@ -325,7 +355,7 @@ def get_links_of_type(self, link_type):
:Returns:
As tuple returns list of links.
"""
- return (link for link in self.links if link.get('type', '') == link_type)
+ return (link for link in self.links if link.get("type", "") == link_type)
def add_item(self, item):
"""
@@ -335,10 +365,10 @@ def add_item(self, item):
- item: item we want to add defined as instance of EpubItem
"""
if item.get_type() == ebooklib.ITEM_STYLE:
- self.add_link(href=item.get_name(), rel='stylesheet', type='text/css')
+ self.add_link(href=item.get_name(), rel="stylesheet", type="text/css")
if item.get_type() == ebooklib.ITEM_SCRIPT:
- self.add_link(src=item.get_name(), type='text/javascript')
+ self.add_link(src=item.get_name(), type="text/javascript")
def get_body_content(self):
"""
@@ -352,24 +382,26 @@ def get_body_content(self):
try:
html_tree = parse_html_string(self.content)
except:
- return ''
+ return ""
html_root = html_tree.getroottree()
- if len(html_root.find('body')) != 0:
- body = html_tree.find('body')
+ if len(html_root.find("body")) != 0:
+ body = html_tree.find("body")
- tree_str = etree.tostring(body, pretty_print=True, encoding='utf-8', xml_declaration=False)
+ tree_str = etree.tostring(
+ body, pretty_print=True, encoding="utf-8", xml_declaration=False
+ )
# this is so stupid
- if tree_str.startswith(six.b('')):
- n = tree_str.rindex(six.b(''))
+ if tree_str.startswith(six.b("")):
+ n = tree_str.rindex(six.b(""))
return tree_str[6:n]
return tree_str
- return ''
+ return ""
def get_content(self, default=None):
"""
@@ -386,8 +418,10 @@ def get_content(self, default=None):
tree = parse_string(self.book.get_template(self._template_name))
tree_root = tree.getroot()
- tree_root.set('lang', self.lang or self.book.language)
- tree_root.attrib['{%s}lang' % NAMESPACES['XML']] = self.lang or self.book.language
+ tree_root.set("lang", self.lang or self.book.language)
+ tree_root.attrib["{%s}lang" % NAMESPACES["XML"]] = (
+ self.lang or self.book.language
+ )
# add to the head also
#
@@ -395,25 +429,25 @@ def get_content(self, default=None):
try:
html_tree = parse_html_string(self.content)
except:
- return ''
+ return ""
html_root = html_tree.getroottree()
# create and populate head
- _head = etree.SubElement(tree_root, 'head')
+ _head = etree.SubElement(tree_root, "head")
- if self.title != '':
- _title = etree.SubElement(_head, 'title')
+ if self.title != "":
+ _title = etree.SubElement(_head, "title")
_title.text = self.title
for lnk in self.links:
- if lnk.get('type') == 'text/javascript':
- _lnk = etree.SubElement(_head, 'script', lnk)
+ if lnk.get("type") == "text/javascript":
+ _lnk = etree.SubElement(_head, "script", lnk)
# force
- _lnk.text = ''
+ _lnk.text = ""
else:
- _lnk = etree.SubElement(_head, 'link', lnk)
+ _lnk = etree.SubElement(_head, "link", lnk)
# this should not be like this
# head = html_root.find('head')
@@ -425,31 +459,34 @@ def get_content(self, default=None):
# create and populate body
- _body = etree.SubElement(tree_root, 'body')
+ _body = etree.SubElement(tree_root, "body")
if self.direction:
- _body.set('dir', self.direction)
- tree_root.set('dir', self.direction)
+ _body.set("dir", self.direction)
+ tree_root.set("dir", self.direction)
- body = html_tree.find('body')
+ body = html_tree.find("body")
if body is not None:
for i in body.getchildren():
_body.append(i)
- tree_str = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True)
+ tree_str = etree.tostring(
+ tree, pretty_print=True, encoding="utf-8", xml_declaration=True
+ )
return tree_str
def __str__(self):
- return '' % (self.id, self.file_name)
+ return "" % (self.id, self.file_name)
class EpubCoverHtml(EpubHtml):
-
"""
Represents Cover page in the EPUB file.
"""
- def __init__(self, uid='cover', file_name='cover.xhtml', image_name='', title='Cover'):
+ def __init__(
+ self, uid="cover", file_name="cover.xhtml", image_name="", title="Cover"
+ ):
super(EpubCoverHtml, self).__init__(uid=uid, file_name=file_name, title=title)
self.image_name = image_name
@@ -473,32 +510,48 @@ def get_content(self):
Returns content of this document.
"""
- self.content = self.book.get_template('cover')
+ self.content = self.book.get_template("cover")
tree = parse_string(super(EpubCoverHtml, self).get_content())
tree_root = tree.getroot()
- images = tree_root.xpath('//xhtml:img', namespaces={'xhtml': NAMESPACES['XHTML']})
+ images = tree_root.xpath(
+ "//xhtml:img", namespaces={"xhtml": NAMESPACES["XHTML"]}
+ )
- images[0].set('src', self.image_name)
- images[0].set('alt', self.title)
+ images[0].set("src", self.image_name)
+ images[0].set("alt", self.title)
- tree_str = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True)
+ tree_str = etree.tostring(
+ tree, pretty_print=True, encoding="utf-8", xml_declaration=True
+ )
return tree_str
def __str__(self):
- return '' % (self.id, self.file_name)
+ return "" % (self.id, self.file_name)
class EpubNav(EpubHtml):
-
"""
Represents Navigation Document in the EPUB file.
"""
- def __init__(self, uid='nav', file_name='nav.xhtml', media_type='application/xhtml+xml', title='', direction=None):
- super(EpubNav, self).__init__(uid=uid, file_name=file_name, media_type=media_type, title=title, direction=direction)
+ def __init__(
+ self,
+ uid="nav",
+ file_name="nav.xhtml",
+ media_type="application/xhtml+xml",
+ title="",
+ direction=None,
+ ):
+ super(EpubNav, self).__init__(
+ uid=uid,
+ file_name=file_name,
+ media_type=media_type,
+ title=title,
+ direction=direction,
+ )
def is_chapter(self):
"""
@@ -511,11 +564,10 @@ def is_chapter(self):
return False
def __str__(self):
- return '' % (self.id, self.file_name)
+ return "" % (self.id, self.file_name)
class EpubImage(EpubItem):
-
"""
Represents Image in the EPUB file.
"""
@@ -527,23 +579,29 @@ def get_type(self):
return ebooklib.ITEM_IMAGE
def __str__(self):
- return '' % (self.id, self.file_name)
+ return "" % (self.id, self.file_name)
class EpubSMIL(EpubItem):
- def __init__(self, uid=None, file_name='', content=None):
- super(EpubSMIL, self).__init__(uid=uid, file_name=file_name, media_type='application/smil+xml', content=content)
+ def __init__(self, uid=None, file_name="", content=None):
+ super(EpubSMIL, self).__init__(
+ uid=uid,
+ file_name=file_name,
+ media_type="application/smil+xml",
+ content=content,
+ )
def get_type(self):
return ebooklib.ITEM_SMIL
def __str__(self):
- return '' % (self.id, self.file_name)
+ return "" % (self.id, self.file_name)
# EpubBook
+
class EpubBook(object):
def __init__(self):
@@ -564,27 +622,33 @@ def reset(self):
self.toc = []
self.bindings = []
- self.IDENTIFIER_ID = 'id'
- self.FOLDER_NAME = 'EPUB'
+ self.IDENTIFIER_ID = "id"
+ self.FOLDER_NAME = "EPUB"
self._id_html = 0
self._id_image = 0
self._id_static = 0
- self.title = ''
- self.language = 'en'
+ self.title = ""
+ self.language = "en"
self.direction = None
self.templates = {
- 'ncx': NCX_XML,
- 'nav': NAV_XML,
- 'chapter': CHAPTER_XML,
- 'cover': COVER_XML
+ "ncx": NCX_XML,
+ "nav": NAV_XML,
+ "chapter": CHAPTER_XML,
+ "cover": COVER_XML,
}
- self.add_metadata('OPF', 'generator', '', {
- 'name': 'generator', 'content': 'Ebook-lib %s' % '.'.join([str(s) for s in VERSION])
- })
+ self.add_metadata(
+ "OPF",
+ "generator",
+ "",
+ {
+ "name": "generator",
+ "content": "Ebook-lib %s" % ".".join([str(s) for s in VERSION]),
+ },
+ )
# default to using a randomly-unique identifier if one is not specified manually
self.set_identifier(str(uuid.uuid4()))
@@ -603,7 +667,9 @@ def set_identifier(self, uid):
self.uid = uid
- self.set_unique_metadata('DC', 'identifier', self.uid, {'id': self.IDENTIFIER_ID})
+ self.set_unique_metadata(
+ "DC", "identifier", self.uid, {"id": self.IDENTIFIER_ID}
+ )
def set_title(self, title):
"""
@@ -615,7 +681,7 @@ def set_title(self, title):
self.title = title
- self.add_metadata('DC', 'title', self.title)
+ self.add_metadata("DC", "title", self.title)
def set_language(self, lang):
"""
@@ -628,7 +694,7 @@ def set_language(self, lang):
self.language = lang
- self.add_metadata('DC', 'language', lang)
+ self.add_metadata("DC", "language", lang)
def set_direction(self, direction):
"""
@@ -657,21 +723,33 @@ def set_cover(self, file_name, content, create_page=True):
c1 = EpubCoverHtml(image_name=file_name)
self.add_item(c1)
- self.add_metadata(None, 'meta', '', OrderedDict([('name', 'cover'), ('content', 'cover-img')]))
+ self.add_metadata(
+ None, "meta", "", OrderedDict([("name", "cover"), ("content", "cover-img")])
+ )
- def add_author(self, author, file_as=None, role=None, uid='creator'):
+ def add_author(self, author, file_as=None, role=None, uid="creator"):
"Add author for this document"
- self.add_metadata('DC', 'creator', author, {'id': uid})
+ self.add_metadata("DC", "creator", author, {"id": uid})
if file_as:
- self.add_metadata(None, 'meta', file_as, {'refines': '#' + uid,
- 'property': 'file-as',
- 'scheme': 'marc:relators'})
+ self.add_metadata(
+ None,
+ "meta",
+ file_as,
+ {
+ "refines": "#" + uid,
+ "property": "file-as",
+ "scheme": "marc:relators",
+ },
+ )
if role:
- self.add_metadata(None, 'meta', role, {'refines': '#' + uid,
- 'property': 'role',
- 'scheme': 'marc:relators'})
+ self.add_metadata(
+ None,
+ "meta",
+ role,
+ {"refines": "#" + uid, "property": "role", "scheme": "marc:relators"},
+ )
def add_metadata(self, namespace, name, value, others=None):
"Add metadata"
@@ -714,7 +792,7 @@ def add_item(self, item):
:Args:
- item: Item instance
"""
- if item.media_type == '':
+ if item.media_type == "":
(has_guessed, media_type) = guess_type(item.get_name().lower())
if has_guessed:
@@ -723,20 +801,20 @@ def add_item(self, item):
else:
item.media_type = has_guessed
else:
- item.media_type = 'application/octet-stream'
+ item.media_type = "application/octet-stream"
if not item.get_id():
# make chapter_, image_ and static_ configurable
if isinstance(item, EpubHtml):
- item.id = 'chapter_%d' % self._id_html
+ item.id = "chapter_%d" % self._id_html
self._id_html += 1
# If there's a page list, append it to the book's page list
self.pages += item.pages
elif isinstance(item, EpubImage):
- item.id = 'image_%d' % self._id_image
+ item.id = "image_%d" % self._id_image
self._id_image += 1
else:
- item.id = 'static_%d' % self._id_static
+ item.id = "static_%d" % self._id_static
self._id_static += 1
item.book = self
@@ -856,24 +934,21 @@ def add_prefix(self, name, uri):
- uri: URI for the namespace
"""
- self.prefixes.append('%s: %s' % (name, uri))
+ self.prefixes.append("%s: %s" % (name, uri))
class EpubWriter(object):
DEFAULT_OPTIONS = {
- 'epub2_guide': True,
- 'epub3_landmark': True,
- 'epub3_pages': True,
- 'landmark_title': 'Guide',
- 'pages_title': 'Pages',
- 'spine_direction': True,
- 'package_direction': False,
- 'play_order': {
- 'enabled': False,
- 'start_from': 1
- },
- 'raise_exceptions': False,
- 'compresslevel': 6
+ "epub2_guide": True,
+ "epub3_landmark": True,
+ "epub3_pages": True,
+ "landmark_title": "Guide",
+ "pages_title": "Pages",
+ "spine_direction": True,
+ "package_direction": False,
+ "play_order": {"enabled": False, "start_from": 1},
+ "raise_exceptions": False,
+ "compresslevel": 6,
}
def __init__(self, name, book, options=None):
@@ -887,31 +962,28 @@ def __init__(self, name, book, options=None):
self._init_play_order()
def _init_play_order(self):
- self._play_order = {
- 'enabled': False,
- 'start_from': 1
- }
+ self._play_order = {"enabled": False, "start_from": 1}
try:
- self._play_order['enabled'] = self.options['play_order']['enabled']
- self._play_order['start_from'] = self.options['play_order']['start_from']
+ self._play_order["enabled"] = self.options["play_order"]["enabled"]
+ self._play_order["start_from"] = self.options["play_order"]["start_from"]
except KeyError:
pass
def process(self):
# should cache this html parsing so we don't do it for every plugin
- for plg in self.options.get('plugins', []):
- if hasattr(plg, 'before_write'):
+ for plg in self.options.get("plugins", []):
+ if hasattr(plg, "before_write"):
plg.before_write(self.book)
for item in self.book.get_items():
if isinstance(item, EpubHtml):
- for plg in self.options.get('plugins', []):
- if hasattr(plg, 'html_before_write'):
+ for plg in self.options.get("plugins", []):
+ if hasattr(plg, "html_before_write"):
plg.html_before_write(self.book, item)
def _write_container(self):
- container_xml = CONTAINER_XML % {'folder_name': self.book.FOLDER_NAME}
+ container_xml = CONTAINER_XML % {"folder_name": self.book.FOLDER_NAME}
self.out.writestr(CONTAINER_PATH, container_xml)
def _write_opf_metadata(self, root):
@@ -923,46 +995,52 @@ def _write_opf_metadata(self, root):
# if ns_name == ns_url:
# nsmap[n_id.lower()] = NAMESPACES[n_id]
- nsmap = {'dc': NAMESPACES['DC'], 'opf': NAMESPACES['OPF']}
+ nsmap = {"dc": NAMESPACES["DC"], "opf": NAMESPACES["OPF"]}
nsmap.update(self.book.namespaces)
- metadata = etree.SubElement(root, 'metadata', nsmap=nsmap)
+ metadata = etree.SubElement(root, "metadata", nsmap=nsmap)
- el = etree.SubElement(metadata, 'meta', {'property': 'dcterms:modified'})
- if 'mtime' in self.options:
- mtime = self.options['mtime']
+ el = etree.SubElement(metadata, "meta", {"property": "dcterms:modified"})
+ if "mtime" in self.options:
+ mtime = self.options["mtime"]
else:
import datetime
+
mtime = datetime.datetime.now()
- el.text = mtime.strftime('%Y-%m-%dT%H:%M:%SZ')
+ el.text = mtime.strftime("%Y-%m-%dT%H:%M:%SZ")
for ns_name, values in six.iteritems(self.book.metadata):
- if ns_name == NAMESPACES['OPF']:
+ if ns_name == NAMESPACES["OPF"]:
for values in values.values():
for v in values:
- if 'property' in v[1] and v[1]['property'] == 'dcterms:modified':
+ if (
+ "property" in v[1]
+ and v[1]["property"] == "dcterms:modified"
+ ):
continue
try:
- el = etree.SubElement(metadata, 'meta', v[1])
+ el = etree.SubElement(metadata, "meta", v[1])
if v[0]:
el.text = v[0]
except ValueError:
- logging.error('Could not create metadata.')
+ logging.error("Could not create metadata.")
else:
for name, values in six.iteritems(values):
for v in values:
try:
if ns_name:
- el = etree.SubElement(metadata, '{%s}%s' % (ns_name, name), v[1])
+ el = etree.SubElement(
+ metadata, "{%s}%s" % (ns_name, name), v[1]
+ )
else:
- el = etree.SubElement(metadata, '%s' % name, v[1])
+ el = etree.SubElement(metadata, "%s" % name, v[1])
el.text = v[0]
except ValueError:
logging.info('Could not create metadata "{}".'.format(name))
def _write_opf_manifest(self, root):
- manifest = etree.SubElement(root, 'manifest')
+ manifest = etree.SubElement(root, "manifest")
_ncx_id = None
# mathml, scripted, svg, remote-resources, and switch
@@ -974,45 +1052,65 @@ def _write_opf_manifest(self, root):
continue
if isinstance(item, EpubNav):
- etree.SubElement(manifest, 'item', {'href': item.get_name(),
- 'id': item.id,
- 'media-type': item.media_type,
- 'properties': 'nav'})
+ etree.SubElement(
+ manifest,
+ "item",
+ {
+ "href": item.get_name(),
+ "id": item.id,
+ "media-type": item.media_type,
+ "properties": "nav",
+ },
+ )
elif isinstance(item, EpubNcx):
_ncx_id = item.id
- etree.SubElement(manifest, 'item', {'href': item.file_name,
- 'id': item.id,
- 'media-type': item.media_type})
+ etree.SubElement(
+ manifest,
+ "item",
+ {
+ "href": item.file_name,
+ "id": item.id,
+ "media-type": item.media_type,
+ },
+ )
elif isinstance(item, EpubCover):
- etree.SubElement(manifest, 'item', {'href': item.file_name,
- 'id': item.id,
- 'media-type': item.media_type,
- 'properties': 'cover-image'})
+ etree.SubElement(
+ manifest,
+ "item",
+ {
+ "href": item.file_name,
+ "id": item.id,
+ "media-type": item.media_type,
+ "properties": "cover-image",
+ },
+ )
else:
- opts = {'href': item.file_name,
- 'id': item.id,
- 'media-type': item.media_type}
+ opts = {
+ "href": item.file_name,
+ "id": item.id,
+ "media-type": item.media_type,
+ }
- if hasattr(item, 'properties') and len(item.properties) > 0:
- opts['properties'] = ' '.join(item.properties)
+ if hasattr(item, "properties") and len(item.properties) > 0:
+ opts["properties"] = " ".join(item.properties)
- if hasattr(item, 'media_overlay') and item.media_overlay is not None:
- opts['media-overlay'] = item.media_overlay
+ if hasattr(item, "media_overlay") and item.media_overlay is not None:
+ opts["media-overlay"] = item.media_overlay
- if hasattr(item, 'media_duration') and item.media_duration is not None:
- opts['duration'] = item.media_duration
+ if hasattr(item, "media_duration") and item.media_duration is not None:
+ opts["duration"] = item.media_duration
- etree.SubElement(manifest, 'item', opts)
+ etree.SubElement(manifest, "item", opts)
return _ncx_id
def _write_opf_spine(self, root, ncx_id):
- spine_attributes = {'toc': ncx_id or 'ncx'}
- if self.book.direction and self.options['spine_direction']:
- spine_attributes['page-progression-direction'] = self.book.direction
+ spine_attributes = {"toc": ncx_id or "ncx"}
+ if self.book.direction and self.options["spine_direction"]:
+ spine_attributes["page-progression-direction"] = self.book.direction
- spine = etree.SubElement(root, 'spine', spine_attributes)
+ spine = etree.SubElement(root, "spine", spine_attributes)
for _item in self.book.spine:
# this is for now
@@ -1024,78 +1122,86 @@ def _write_opf_spine(self, root, ncx_id):
item = _item[0]
if len(_item) > 1:
- if _item[1] == 'no':
+ if _item[1] == "no":
is_linear = False
else:
item = _item
if isinstance(item, EpubHtml):
- opts = {'idref': item.get_id()}
+ opts = {"idref": item.get_id()}
if not item.is_linear or not is_linear:
- opts['linear'] = 'no'
+ opts["linear"] = "no"
elif isinstance(item, EpubItem):
- opts = {'idref': item.get_id()}
+ opts = {"idref": item.get_id()}
if not item.is_linear or not is_linear:
- opts['linear'] = 'no'
+ opts["linear"] = "no"
else:
- opts = {'idref': item}
+ opts = {"idref": item}
try:
itm = self.book.get_item_with_id(item)
if not itm.is_linear or not is_linear:
- opts['linear'] = 'no'
+ opts["linear"] = "no"
except:
pass
- etree.SubElement(spine, 'itemref', opts)
+ etree.SubElement(spine, "itemref", opts)
def _write_opf_guide(self, root):
# - http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.6
- if len(self.book.guide) > 0 and self.options.get('epub2_guide'):
- guide = etree.SubElement(root, 'guide', {})
+ if len(self.book.guide) > 0 and self.options.get("epub2_guide"):
+ guide = etree.SubElement(root, "guide", {})
for item in self.book.guide:
- if 'item' in item:
- chap = item.get('item')
+ if "item" in item:
+ chap = item.get("item")
if chap:
_href = chap.file_name
_title = chap.title
else:
- _href = item.get('href', '')
- _title = item.get('title', '')
+ _href = item.get("href", "")
+ _title = item.get("title", "")
if _title is None:
- _title = ''
- ref = etree.SubElement(guide, 'reference', {'type': item.get('type', ''),
- 'title': _title,
- 'href': _href})
+ _title = ""
+ ref = etree.SubElement(
+ guide,
+ "reference",
+ {"type": item.get("type", ""), "title": _title, "href": _href},
+ )
def _write_opf_bindings(self, root):
if len(self.book.bindings) > 0:
- bindings = etree.SubElement(root, 'bindings', {})
+ bindings = etree.SubElement(root, "bindings", {})
for item in self.book.bindings:
- etree.SubElement(bindings, 'mediaType', item)
+ etree.SubElement(bindings, "mediaType", item)
def _write_opf_file(self, root):
- tree_str = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)
+ tree_str = etree.tostring(
+ root, pretty_print=True, encoding="utf-8", xml_declaration=True
+ )
- self.out.writestr('%s/content.opf' % self.book.FOLDER_NAME, tree_str)
+ self.out.writestr("%s/content.opf" % self.book.FOLDER_NAME, tree_str)
def _write_opf(self):
- package_attributes = {'xmlns': NAMESPACES['OPF'],
- 'unique-identifier': self.book.IDENTIFIER_ID,
- 'version': '3.0'}
- if self.book.direction and self.options['package_direction']:
- package_attributes['dir'] = self.book.direction
+ package_attributes = {
+ "xmlns": NAMESPACES["OPF"],
+ "unique-identifier": self.book.IDENTIFIER_ID,
+ "version": "3.0",
+ }
+ if self.book.direction and self.options["package_direction"]:
+ package_attributes["dir"] = self.book.direction
- root = etree.Element('package', package_attributes)
+ root = etree.Element("package", package_attributes)
- prefixes = ['rendition: http://www.idpf.org/vocab/rendition/#'] + self.book.prefixes
- root.attrib['prefix'] = ' '.join(prefixes)
+ prefixes = [
+ "rendition: http://www.idpf.org/vocab/rendition/#"
+ ] + self.book.prefixes
+ root.attrib["prefix"] = " ".join(prefixes)
# METADATA
self._write_opf_metadata(root)
@@ -1117,60 +1223,88 @@ def _write_opf(self):
def _get_nav(self, item):
# just a basic navigation for now
- nav_xml = parse_string(self.book.get_template('nav'))
+ nav_xml = parse_string(self.book.get_template("nav"))
root = nav_xml.getroot()
- root.set('lang', self.book.language)
- root.attrib['{%s}lang' % NAMESPACES['XML']] = self.book.language
+ root.set("lang", self.book.language)
+ root.attrib["{%s}lang" % NAMESPACES["XML"]] = self.book.language
nav_dir_name = os.path.dirname(item.file_name)
- head = etree.SubElement(root, 'head')
- title = etree.SubElement(head, 'title')
+ head = etree.SubElement(root, "head")
+ title = etree.SubElement(head, "title")
title.text = item.title or self.book.title
# for now this just handles css files and ignores others
for _link in item.links:
- _lnk = etree.SubElement(head, 'link', {
- 'href': _link.get('href', ''), 'rel': 'stylesheet', 'type': 'text/css'
- })
+ _lnk = etree.SubElement(
+ head,
+ "link",
+ {
+ "href": _link.get("href", ""),
+ "rel": "stylesheet",
+ "type": "text/css",
+ },
+ )
- body = etree.SubElement(root, 'body')
+ body = etree.SubElement(root, "body")
if item.direction:
- body.set('dir', item.direction)
- nav = etree.SubElement(body, 'nav', {
- '{%s}type' % NAMESPACES['EPUB']: 'toc',
- 'id': 'id',
- 'role': 'doc-toc',
- })
-
- content_title = etree.SubElement(nav, 'h2')
+ body.set("dir", item.direction)
+ nav = etree.SubElement(
+ body,
+ "nav",
+ {
+ "{%s}type" % NAMESPACES["EPUB"]: "toc",
+ "id": "id",
+ "role": "doc-toc",
+ },
+ )
+
+ content_title = etree.SubElement(nav, "h2")
content_title.text = item.title or self.book.title
def _create_section(itm, items):
- ol = etree.SubElement(itm, 'ol')
+ ol = etree.SubElement(itm, "ol")
for item in items:
if isinstance(item, tuple) or isinstance(item, list):
- li = etree.SubElement(ol, 'li')
+ li = etree.SubElement(ol, "li")
if isinstance(item[0], EpubHtml):
- a = etree.SubElement(li, 'a', {'href': zip_path.relpath(item[0].file_name, nav_dir_name)})
- elif isinstance(item[0], Section) and item[0].href != '':
- a = etree.SubElement(li, 'a', {'href': zip_path.relpath(item[0].href, nav_dir_name)})
+ a = etree.SubElement(
+ li,
+ "a",
+ {"href": zip_path.relpath(item[0].file_name, nav_dir_name)},
+ )
+ elif isinstance(item[0], Section) and item[0].href != "":
+ a = etree.SubElement(
+ li,
+ "a",
+ {"href": zip_path.relpath(item[0].href, nav_dir_name)},
+ )
elif isinstance(item[0], Link):
- a = etree.SubElement(li, 'a', {'href': zip_path.relpath(item[0].href, nav_dir_name)})
+ a = etree.SubElement(
+ li,
+ "a",
+ {"href": zip_path.relpath(item[0].href, nav_dir_name)},
+ )
else:
- a = etree.SubElement(li, 'span')
+ a = etree.SubElement(li, "span")
a.text = item[0].title
_create_section(li, item[1])
elif isinstance(item, Link):
- li = etree.SubElement(ol, 'li')
- a = etree.SubElement(li, 'a', {'href': zip_path.relpath(item.href, nav_dir_name)})
+ li = etree.SubElement(ol, "li")
+ a = etree.SubElement(
+ li, "a", {"href": zip_path.relpath(item.href, nav_dir_name)}
+ )
a.text = item.title
elif isinstance(item, EpubHtml):
- li = etree.SubElement(ol, 'li')
- a = etree.SubElement(li, 'a', {'href': zip_path.relpath(item.file_name, nav_dir_name)})
+ li = etree.SubElement(ol, "li")
+ a = etree.SubElement(
+ li,
+ "a",
+ {"href": zip_path.relpath(item.file_name, nav_dir_name)},
+ )
a.text = item.title
_create_section(nav, self.book.toc)
@@ -1178,196 +1312,240 @@ def _create_section(itm, items):
# LANDMARKS / GUIDE
# - http://www.idpf.org/epub/30/spec/epub30-contentdocs.html#sec-xhtml-nav-def-types-landmarks
- if len(self.book.guide) > 0 and self.options.get('epub3_landmark'):
+ if len(self.book.guide) > 0 and self.options.get("epub3_landmark"):
# Epub2 guide types do not map completely to epub3 landmark types.
- guide_to_landscape_map = {
- 'notes': 'rearnotes',
- 'text': 'bodymatter'
- }
+ guide_to_landscape_map = {"notes": "rearnotes", "text": "bodymatter"}
- guide_nav = etree.SubElement(body, 'nav', {'{%s}type' % NAMESPACES['EPUB']: 'landmarks'})
+ guide_nav = etree.SubElement(
+ body, "nav", {"{%s}type" % NAMESPACES["EPUB"]: "landmarks"}
+ )
- guide_content_title = etree.SubElement(guide_nav, 'h2')
- guide_content_title.text = self.options.get('landmark_title', 'Guide')
+ guide_content_title = etree.SubElement(guide_nav, "h2")
+ guide_content_title.text = self.options.get("landmark_title", "Guide")
- guild_ol = etree.SubElement(guide_nav, 'ol')
+ guild_ol = etree.SubElement(guide_nav, "ol")
for elem in self.book.guide:
- li_item = etree.SubElement(guild_ol, 'li')
+ li_item = etree.SubElement(guild_ol, "li")
- if 'item' in elem:
- chap = elem.get('item', None)
+ if "item" in elem:
+ chap = elem.get("item", None)
if chap:
_href = chap.file_name
_title = chap.title
else:
- _href = elem.get('href', '')
- _title = elem.get('title', '')
-
- guide_type = elem.get('type', '')
- a_item = etree.SubElement(li_item, 'a', {
- '{%s}type' % NAMESPACES['EPUB']: guide_to_landscape_map.get(guide_type, guide_type),
- 'href': zip_path.relpath(_href, nav_dir_name)
- })
+ _href = elem.get("href", "")
+ _title = elem.get("title", "")
+
+ guide_type = elem.get("type", "")
+ a_item = etree.SubElement(
+ li_item,
+ "a",
+ {
+ "{%s}type"
+ % NAMESPACES["EPUB"]: guide_to_landscape_map.get(
+ guide_type, guide_type
+ ),
+ "href": zip_path.relpath(_href, nav_dir_name),
+ },
+ )
a_item.text = _title
# PAGE-LIST
- if self.options.get('epub3_pages'):
- inserted_pages = get_pages_for_items([item for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT) \
- if not isinstance(item, EpubNav)])
+ if self.options.get("epub3_pages"):
+ inserted_pages = get_pages_for_items(
+ [
+ item
+ for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT)
+ if not isinstance(item, EpubNav)
+ ]
+ )
if len(inserted_pages) > 0:
pagelist_nav = etree.SubElement(
body,
- 'nav',
+ "nav",
{
- '{%s}type' % NAMESPACES['EPUB']: 'page-list',
- 'id': 'pages',
- 'hidden': 'hidden',
- }
+ "{%s}type" % NAMESPACES["EPUB"]: "page-list",
+ "id": "pages",
+ "hidden": "hidden",
+ },
)
- pagelist_content_title = etree.SubElement(pagelist_nav, 'h2')
- pagelist_content_title.text = self.options.get(
- 'pages_title', 'Pages'
- )
-
- pages_ol = etree.SubElement(pagelist_nav, 'ol')
-
+ pagelist_content_title = etree.SubElement(pagelist_nav, "h2")
+ pagelist_content_title.text = self.options.get("pages_title", "Pages")
+ pages_ol = etree.SubElement(pagelist_nav, "ol")
for filename, pageref, label in inserted_pages:
- li_item = etree.SubElement(pages_ol, 'li')
+ li_item = etree.SubElement(pages_ol, "li")
- _href = u'{}#{}'.format(filename, pageref)
+ _href = "{}#{}".format(filename, pageref)
_title = label
- a_item = etree.SubElement(li_item, 'a', {
- 'href': zip_path.relpath(_href, nav_dir_name),
- })
+ a_item = etree.SubElement(
+ li_item,
+ "a",
+ {
+ "href": zip_path.relpath(_href, nav_dir_name),
+ },
+ )
a_item.text = _title
- tree_str = etree.tostring(nav_xml, pretty_print=True, encoding='utf-8', xml_declaration=True)
+ tree_str = etree.tostring(
+ nav_xml, pretty_print=True, encoding="utf-8", xml_declaration=True
+ )
return tree_str
def _get_ncx(self):
# we should be able to setup language for NCX as also
- ncx = parse_string(self.book.get_template('ncx'))
+ ncx = parse_string(self.book.get_template("ncx"))
root = ncx.getroot()
- head = etree.SubElement(root, 'head')
+ head = etree.SubElement(root, "head")
# get this id
- uid = etree.SubElement(head, 'meta', {'content': self.book.uid, 'name': 'dtb:uid'})
- uid = etree.SubElement(head, 'meta', {'content': '0', 'name': 'dtb:depth'})
- uid = etree.SubElement(head, 'meta', {'content': '0', 'name': 'dtb:totalPageCount'})
- uid = etree.SubElement(head, 'meta', {'content': '0', 'name': 'dtb:maxPageNumber'})
-
- doc_title = etree.SubElement(root, 'docTitle')
- title = etree.SubElement(doc_title, 'text')
+ uid = etree.SubElement(
+ head, "meta", {"content": self.book.uid, "name": "dtb:uid"}
+ )
+ uid = etree.SubElement(head, "meta", {"content": "0", "name": "dtb:depth"})
+ uid = etree.SubElement(
+ head, "meta", {"content": "0", "name": "dtb:totalPageCount"}
+ )
+ uid = etree.SubElement(
+ head, "meta", {"content": "0", "name": "dtb:maxPageNumber"}
+ )
+
+ doc_title = etree.SubElement(root, "docTitle")
+ title = etree.SubElement(doc_title, "text")
title.text = self.book.title
-# doc_author = etree.SubElement(root, 'docAuthor')
-# author = etree.SubElement(doc_author, 'text')
-# author.text = 'Name of the person'
+ # doc_author = etree.SubElement(root, 'docAuthor')
+ # author = etree.SubElement(doc_author, 'text')
+ # author.text = 'Name of the person'
# For now just make a very simple navMap
- nav_map = etree.SubElement(root, 'navMap')
+ nav_map = etree.SubElement(root, "navMap")
def _add_play_order(nav_point):
- nav_point.set('playOrder', str(self._play_order['start_from']))
- self._play_order['start_from'] += 1
+ nav_point.set("playOrder", str(self._play_order["start_from"]))
+ self._play_order["start_from"] += 1
def _create_section(itm, items, uid):
for item in items:
if isinstance(item, tuple) or isinstance(item, list):
section, subsection = item[0], item[1]
- np = etree.SubElement(itm, 'navPoint', {
- 'id': section.get_id() if isinstance(section, EpubHtml) else 'sep_%d' % uid
- })
-
- if self._play_order['enabled']:
+ np = etree.SubElement(
+ itm,
+ "navPoint",
+ {
+ "id": (
+ section.get_id()
+ if isinstance(section, EpubHtml)
+ else "sep_%d" % uid
+ )
+ },
+ )
+
+ if self._play_order["enabled"]:
_add_play_order(np)
- nl = etree.SubElement(np, 'navLabel')
- nt = etree.SubElement(nl, 'text')
+ nl = etree.SubElement(np, "navLabel")
+ nt = etree.SubElement(nl, "text")
nt.text = section.title
# CAN NOT HAVE EMPTY SRC HERE
- href = ''
+ href = ""
if isinstance(section, EpubHtml):
href = section.file_name
- elif isinstance(section, Section) and section.href != '':
+ elif isinstance(section, Section) and section.href != "":
href = section.href
elif isinstance(section, Link):
href = section.href
- nc = etree.SubElement(np, 'content', {'src': href})
+ nc = etree.SubElement(np, "content", {"src": href})
uid = _create_section(np, subsection, uid + 1)
elif isinstance(item, Link):
_parent = itm
- _content = _parent.find('content')
+ _content = _parent.find("content")
if _content is not None:
- if _content.get('src') == '':
- _content.set('src', item.href)
+ if _content.get("src") == "":
+ _content.set("src", item.href)
- np = etree.SubElement(itm, 'navPoint', {'id': item.uid})
+ np = etree.SubElement(itm, "navPoint", {"id": item.uid})
- if self._play_order['enabled']:
+ if self._play_order["enabled"]:
_add_play_order(np)
- nl = etree.SubElement(np, 'navLabel')
- nt = etree.SubElement(nl, 'text')
+ nl = etree.SubElement(np, "navLabel")
+ nt = etree.SubElement(nl, "text")
nt.text = item.title
- nc = etree.SubElement(np, 'content', {'src': item.href})
+ nc = etree.SubElement(np, "content", {"src": item.href})
elif isinstance(item, EpubHtml):
_parent = itm
- _content = _parent.find('content')
+ _content = _parent.find("content")
if _content is not None:
- if _content.get('src') == '':
- _content.set('src', item.file_name)
+ if _content.get("src") == "":
+ _content.set("src", item.file_name)
- np = etree.SubElement(itm, 'navPoint', {'id': item.get_id()})
+ np = etree.SubElement(itm, "navPoint", {"id": item.get_id()})
- if self._play_order['enabled']:
+ if self._play_order["enabled"]:
_add_play_order(np)
- nl = etree.SubElement(np, 'navLabel')
- nt = etree.SubElement(nl, 'text')
+ nl = etree.SubElement(np, "navLabel")
+ nt = etree.SubElement(nl, "text")
nt.text = item.title
- nc = etree.SubElement(np, 'content', {'src': item.file_name})
+ nc = etree.SubElement(np, "content", {"src": item.file_name})
return uid
_create_section(nav_map, self.book.toc, 0)
- tree_str = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)
+ tree_str = etree.tostring(
+ root, pretty_print=True, encoding="utf-8", xml_declaration=True
+ )
return tree_str
def _write_items(self):
for item in self.book.get_items():
if isinstance(item, EpubNcx):
- self.out.writestr('%s/%s' % (self.book.FOLDER_NAME, item.file_name), self._get_ncx())
+ self.out.writestr(
+ "%s/%s" % (self.book.FOLDER_NAME, item.file_name), self._get_ncx()
+ )
elif isinstance(item, EpubNav):
- self.out.writestr('%s/%s' % (self.book.FOLDER_NAME, item.file_name), self._get_nav(item))
+ self.out.writestr(
+ "%s/%s" % (self.book.FOLDER_NAME, item.file_name),
+ self._get_nav(item),
+ )
elif item.manifest:
- self.out.writestr('%s/%s' % (self.book.FOLDER_NAME, item.file_name), item.get_content())
+ self.out.writestr(
+ "%s/%s" % (self.book.FOLDER_NAME, item.file_name),
+ item.get_content(),
+ )
else:
- self.out.writestr('%s' % item.file_name, item.get_content())
+ self.out.writestr("%s" % item.file_name, item.get_content())
def write(self):
# check for the option allowZip64
- self.out = zipfile.ZipFile(self.file_name, 'w', zipfile.ZIP_DEFLATED, compresslevel=self.options['compresslevel'])
- self.out.writestr('mimetype', 'application/epub+zip', compress_type=zipfile.ZIP_STORED)
+ self.out = zipfile.ZipFile(
+ self.file_name,
+ "w",
+ zipfile.ZIP_DEFLATED,
+ compresslevel=self.options["compresslevel"],
+ )
+ self.out.writestr(
+ "mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED
+ )
self._write_container()
self._write_opf()
@@ -1377,17 +1555,15 @@ def write(self):
class EpubReader(object):
- DEFAULT_OPTIONS = {
- 'ignore_ncx': False
- }
+ DEFAULT_OPTIONS = {"ignore_ncx": False}
def __init__(self, epub_file_name, options=None):
self.file_name = epub_file_name
self.book = EpubBook()
self.zf = None
- self.opf_file = ''
- self.opf_dir = ''
+ self.opf_file = ""
+ self.opf_dir = ""
self.options = dict(self.DEFAULT_OPTIONS)
if options:
@@ -1396,19 +1572,21 @@ def __init__(self, epub_file_name, options=None):
self._check_deprecated()
def _check_deprecated(self):
- if self.options.get('ignore_ncx') is None:
- warnings.warn('In the future version we will turn default option ignore_ncx to True.')
+ if self.options.get("ignore_ncx") is None:
+ warnings.warn(
+ "In the future version we will turn default option ignore_ncx to True."
+ )
def process(self):
# should cache this html parsing so we don't do it for every plugin
- for plg in self.options.get('plugins', []):
- if hasattr(plg, 'after_read'):
+ for plg in self.options.get("plugins", []):
+ if hasattr(plg, "after_read"):
plg.after_read(self.book)
for item in self.book.get_items():
if isinstance(item, EpubHtml):
- for plg in self.options.get('plugins', []):
- if hasattr(plg, 'html_after_read'):
+ for plg in self.options.get("plugins", []):
+ if hasattr(plg, "html_after_read"):
plg.html_after_read(self.book, item)
def load(self):
@@ -1422,31 +1600,34 @@ def read_file(self, name):
return self.zf.read(name)
def _load_container(self):
- meta_inf = self.read_file('META-INF/container.xml')
+ meta_inf = self.read_file("META-INF/container.xml")
tree = parse_string(meta_inf)
- for root_file in tree.findall('.//xmlns:rootfile[@media-type]', namespaces={'xmlns': NAMESPACES['CONTAINERNS']}):
- if root_file.get('media-type') == 'application/oebps-package+xml':
- self.opf_file = root_file.get('full-path')
+ for root_file in tree.findall(
+ ".//xmlns:rootfile[@media-type]",
+ namespaces={"xmlns": NAMESPACES["CONTAINERNS"]},
+ ):
+ if root_file.get("media-type") == "application/oebps-package+xml":
+ self.opf_file = root_file.get("full-path")
self.opf_dir = zip_path.dirname(self.opf_file)
def _load_metadata(self):
container_root = self.container.getroot()
# get epub version
- self.book.version = container_root.get('version', None)
+ self.book.version = container_root.get("version", None)
# get unique-identifier
- if container_root.get('unique-identifier', None):
- self.book.IDENTIFIER_ID = container_root.get('unique-identifier')
+ if container_root.get("unique-identifier", None):
+ self.book.IDENTIFIER_ID = container_root.get("unique-identifier")
# get xml:lang
# get metadata
- metadata = self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'metadata'))
+ metadata = self.container.find("{%s}%s" % (NAMESPACES["OPF"], "metadata"))
nsmap = metadata.nsmap
- nstags = dict((k, '{%s}' % v) for k, v in six.iteritems(nsmap))
- default_ns = nstags.get(None, '')
+ nstags = dict((k, "{%s}" % v) for k, v in six.iteritems(nsmap))
+ default_ns = nstags.get(None, "")
nsdict = dict((v, {}) for v in nsmap.values())
@@ -1460,21 +1641,21 @@ def add_item(ns, tag, value, extra):
for t in metadata:
if not etree.iselement(t) or t.tag is etree.Comment:
continue
- if t.tag == default_ns + 'meta':
- name = t.get('name')
+ if t.tag == default_ns + "meta":
+ name = t.get("name")
others = dict((k, v) for k, v in t.items())
- if name and ':' in name:
- prefix, name = name.split(':', 1)
+ if name and ":" in name:
+ prefix, name = name.split(":", 1)
else:
prefix = None
add_item(t.nsmap.get(prefix, prefix), name, t.text, others)
else:
- tag = t.tag[t.tag.rfind('}') + 1:]
+ tag = t.tag[t.tag.rfind("}") + 1 :]
- if (t.prefix and t.prefix.lower() == 'dc') and tag == 'identifier':
- _id = t.get('id', None)
+ if (t.prefix and t.prefix.lower() == "dc") and tag == "identifier":
+ _id = t.get("id", None)
if _id:
self.book.IDENTIFIER_ID = _id
@@ -1484,77 +1665,87 @@ def add_item(ns, tag, value, extra):
self.book.metadata = nsdict
- titles = self.book.get_metadata('DC', 'title')
+ titles = self.book.get_metadata("DC", "title")
if len(titles) > 0:
self.book.title = titles[0][0]
- for value, others in self.book.get_metadata('DC', 'identifier'):
- if others.get('id') == self.book.IDENTIFIER_ID:
+ for value, others in self.book.get_metadata("DC", "identifier"):
+ if others.get("id") == self.book.IDENTIFIER_ID:
self.book.uid = value
def _load_manifest(self):
- for r in self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'manifest')):
- if r is not None and r.tag != '{%s}item' % NAMESPACES['OPF']:
+ for r in self.container.find("{%s}%s" % (NAMESPACES["OPF"], "manifest")):
+ if r is not None and r.tag != "{%s}item" % NAMESPACES["OPF"]:
continue
- media_type = r.get('media-type')
- _properties = r.get('properties', '')
+ media_type = r.get("media-type")
+ _properties = r.get("properties", "")
if _properties:
- properties = _properties.split(' ')
+ properties = _properties.split(" ")
else:
properties = []
# people use wrong content types
- if media_type == 'image/jpg':
- media_type = 'image/jpeg'
+ if media_type == "image/jpg":
+ media_type = "image/jpeg"
- if media_type == 'application/x-dtbncx+xml':
- ei = EpubNcx(uid=r.get('id'), file_name=unquote(r.get('href')))
+ if media_type == "application/x-dtbncx+xml":
+ ei = EpubNcx(uid=r.get("id"), file_name=unquote(r.get("href")))
ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
- elif media_type == 'application/smil+xml':
- ei = EpubSMIL(uid=r.get('id'), file_name=unquote(r.get('href')))
+ elif media_type == "application/smil+xml":
+ ei = EpubSMIL(uid=r.get("id"), file_name=unquote(r.get("href")))
ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
- elif media_type == 'application/xhtml+xml':
- if 'nav' in properties:
- ei = EpubNav(uid=r.get('id'), file_name=unquote(r.get('href')))
-
- ei.content = self.read_file(zip_path.join(self.opf_dir, r.get('href')))
- elif 'cover' in properties:
+ elif media_type == "application/xhtml+xml":
+ if "nav" in properties:
+ ei = EpubNav(uid=r.get("id"), file_name=unquote(r.get("href")))
+
+ ei.content = self.read_file(
+ zip_path.join(self.opf_dir, r.get("href"))
+ )
+ elif "cover" in properties:
ei = EpubCoverHtml()
- ei.content = self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href'))))
+ ei.content = self.read_file(
+ zip_path.join(self.opf_dir, unquote(r.get("href")))
+ )
else:
ei = EpubHtml()
- ei.id = r.get('id')
- ei.file_name = unquote(r.get('href'))
+ ei.id = r.get("id")
+ ei.file_name = unquote(r.get("href"))
ei.media_type = media_type
- ei.media_overlay = r.get('media-overlay', None)
- ei.media_duration = r.get('duration', None)
- ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+ ei.media_overlay = r.get("media-overlay", None)
+ ei.media_duration = r.get("duration", None)
+ ei.content = self.read_file(
+ zip_path.join(self.opf_dir, ei.get_name())
+ )
ei.properties = properties
elif media_type in IMAGE_MEDIA_TYPES:
- if 'cover-image' in properties:
- ei = EpubCover(uid=r.get('id'), file_name=unquote(r.get('href')))
+ if "cover-image" in properties:
+ ei = EpubCover(uid=r.get("id"), file_name=unquote(r.get("href")))
ei.media_type = media_type
- ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+ ei.content = self.read_file(
+ zip_path.join(self.opf_dir, ei.get_name())
+ )
else:
ei = EpubImage()
- ei.id = r.get('id')
- ei.file_name = unquote(r.get('href'))
+ ei.id = r.get("id")
+ ei.file_name = unquote(r.get("href"))
ei.media_type = media_type
- ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+ ei.content = self.read_file(
+ zip_path.join(self.opf_dir, ei.get_name())
+ )
else:
# different types
ei = EpubItem()
- ei.id = r.get('id')
- ei.file_name = unquote(r.get('href'))
+ ei.id = r.get("id")
+ ei.file_name = unquote(r.get("href"))
ei.media_type = media_type
ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
@@ -1565,34 +1756,33 @@ def _parse_ncx(self, data):
tree = parse_string(data)
tree_root = tree.getroot()
- nav_map = tree_root.find('{%s}navMap' % NAMESPACES['DAISY'])
+ nav_map = tree_root.find("{%s}navMap" % NAMESPACES["DAISY"])
def _get_children(elems, n, nid):
- label, content = '', ''
+ label, content = "", ""
children = []
for a in elems.getchildren():
- if a.tag == '{%s}navLabel' % NAMESPACES['DAISY']:
+ if a.tag == "{%s}navLabel" % NAMESPACES["DAISY"]:
label = a.getchildren()[0].text
- if a.tag == '{%s}content' % NAMESPACES['DAISY']:
- content = a.get('src', '')
- if a.tag == '{%s}navPoint' % NAMESPACES['DAISY']:
- children.append(_get_children(a, n + 1, a.get('id', '')))
+ if a.tag == "{%s}content" % NAMESPACES["DAISY"]:
+ content = a.get("src", "")
+ if a.tag == "{%s}navPoint" % NAMESPACES["DAISY"]:
+ children.append(_get_children(a, n + 1, a.get("id", "")))
if len(children) > 0:
if n == 0:
return children
- return (Section(label, href=content),
- children)
+ return (Section(label, href=content), children)
else:
return Link(content, label, nid)
- self.book.toc = _get_children(nav_map, 0, '')
+ self.book.toc = _get_children(nav_map, 0, "")
- def _parse_nav(self, data, base_path, navtype='toc'):
+ def _parse_nav(self, data, base_path, navtype="toc"):
html_node = parse_html_string(data)
- if navtype == 'toc':
+ if navtype == "toc":
# parsing the table of contents
nav_node = html_node.xpath("//nav[@*='toc']")[0]
else:
@@ -1605,33 +1795,37 @@ def _parse_nav(self, data, base_path, navtype='toc'):
def parse_list(list_node):
items = []
- for item_node in list_node.findall('li'):
+ for item_node in list_node.findall("li"):
- sublist_node = item_node.find('ol')
- link_node = item_node.find('a')
+ sublist_node = item_node.find("ol")
+ link_node = item_node.find("a")
if sublist_node is not None:
title = item_node[0].text_content()
children = parse_list(sublist_node)
if link_node is not None:
- href = zip_path.normpath(zip_path.join(base_path, link_node.get('href')))
+ href = zip_path.normpath(
+ zip_path.join(base_path, link_node.get("href"))
+ )
items.append((Section(title, href=href), children))
else:
items.append((Section(title), children))
elif link_node is not None:
title = link_node.text_content()
- href = zip_path.normpath(zip_path.join(base_path, link_node.get('href')))
+ href = zip_path.normpath(
+ zip_path.join(base_path, link_node.get("href"))
+ )
items.append(Link(href, title))
return items
- if navtype == 'toc':
- self.book.toc = parse_list(nav_node.find('ol'))
+ if navtype == "toc":
+ self.book.toc = parse_list(nav_node.find("ol"))
elif nav_node is not None:
# generate the pages list if there is one
- self.book.pages = parse_list(nav_node.find('ol'))
+ self.book.pages = parse_list(nav_node.find("ol"))
# generate the per-file pages lists
# because of the order of parsing the files, this can't be done
@@ -1642,41 +1836,50 @@ def parse_list(list_node):
htmlfiles[htmlfile.file_name] = htmlfile
for page in self.book.pages:
try:
- (filename, idref) = page.href.split('#')
+ (filename, idref) = page.href.split("#")
except ValueError:
filename = page.href
if filename in htmlfiles:
htmlfiles[filename].pages.append(page)
def _load_spine(self):
- spine = self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'spine'))
+ spine = self.container.find("{%s}%s" % (NAMESPACES["OPF"], "spine"))
- self.book.spine = [(t.get('idref'), t.get('linear', 'yes')) for t in spine]
+ self.book.spine = [(t.get("idref"), t.get("linear", "yes")) for t in spine]
- toc = spine.get('toc', '')
- self.book.set_direction(spine.get('page-progression-direction', None))
+ toc = spine.get("toc", "")
+ self.book.set_direction(spine.get("page-progression-direction", None))
# should read ncx or nav file
- nav_item = next((item for item in self.book.items if isinstance(item, EpubNav)), None)
+ nav_item = next(
+ (item for item in self.book.items if isinstance(item, EpubNav)), None
+ )
if toc:
- if not self.options.get('ignore_ncx') or not nav_item:
+ if not self.options.get("ignore_ncx") or not nav_item:
try:
- ncxFile = self.read_file(zip_path.join(self.opf_dir, self.book.get_item_with_id(toc).get_name()))
+ ncxFile = self.read_file(
+ zip_path.join(
+ self.opf_dir, self.book.get_item_with_id(toc).get_name()
+ )
+ )
except KeyError:
- raise EpubException(-1, 'Can not find ncx file.')
+ raise EpubException(-1, "Can not find ncx file.")
self._parse_ncx(ncxFile)
def _load_guide(self):
- guide = self.container.find('{%s}%s' % (NAMESPACES['OPF'], 'guide'))
+ guide = self.container.find("{%s}%s" % (NAMESPACES["OPF"], "guide"))
if guide is not None:
- self.book.guide = [{'href': t.get('href'), 'title': t.get('title'), 'type': t.get('type')} for t in guide]
+ self.book.guide = [
+ {"href": t.get("href"), "title": t.get("title"), "type": t.get("type")}
+ for t in guide
+ ]
def _load_opf_file(self):
try:
s = self.read_file(self.opf_file)
except KeyError:
- raise EpubException(-1, 'Can not find container file')
+ raise EpubException(-1, "Can not find container file")
self.container = parse_string(s)
@@ -1687,18 +1890,18 @@ def _load_opf_file(self):
# read nav file if found
#
- nav_item = next((item for item in self.book.items if isinstance(item, EpubNav)), None)
+ nav_item = next(
+ (item for item in self.book.items if isinstance(item, EpubNav)), None
+ )
if nav_item:
- if self.options.get('ignore_ncx') or not self.book.toc:
+ if self.options.get("ignore_ncx") or not self.book.toc:
self._parse_nav(
nav_item.content,
zip_path.dirname(nav_item.file_name),
- navtype='toc'
+ navtype="toc",
)
self._parse_nav(
- nav_item.content,
- zip_path.dirname(nav_item.file_name),
- navtype='pages'
+ nav_item.content, zip_path.dirname(nav_item.file_name), navtype="pages"
)
def _load(self):
@@ -1707,7 +1910,7 @@ def _load(self):
class Directory:
def read(self, subname):
- with open(os.path.join(file_name, subname), 'rb') as fp:
+ with open(os.path.join(file_name, subname), "rb") as fp:
return fp.read()
def close(self):
@@ -1716,11 +1919,16 @@ def close(self):
self.zf = Directory()
else:
try:
- self.zf = zipfile.ZipFile(self.file_name, 'r', compression=zipfile.ZIP_DEFLATED, allowZip64=True)
+ self.zf = zipfile.ZipFile(
+ self.file_name,
+ "r",
+ compression=zipfile.ZIP_DEFLATED,
+ allowZip64=True,
+ )
except zipfile.BadZipfile as bz:
- raise EpubException(0, 'Bad Zip file')
+ raise EpubException(0, "Bad Zip file")
except zipfile.LargeZipFile as bz:
- raise EpubException(1, 'Large Zip file')
+ raise EpubException(1, "Large Zip file")
# 1st check metadata
self._load_container()
@@ -1729,9 +1937,9 @@ def close(self):
self.zf.close()
-
# WRITE
+
def write_epub(name, book, options=None):
"""
Creates epub file with the content defined in EpubBook.
@@ -1750,15 +1958,18 @@ def write_epub(name, book, options=None):
try:
epub.write()
except IOError:
- warnings.warn('In the future throwing exceptions while writing will be default behavior.')
+ warnings.warn(
+ "In the future throwing exceptions while writing will be default behavior."
+ )
t, v, tb = sys.exc_info()
- if options and options.get('raise_exceptions'):
+ if options and options.get("raise_exceptions"):
six.reraise(t, v, tb)
else:
return False
return True
+
# READ
diff --git a/ebooklib/plugins/booktype.py b/ebooklib/plugins/booktype.py
index 9842538..1b0033d 100644
--- a/ebooklib/plugins/booktype.py
+++ b/ebooklib/plugins/booktype.py
@@ -17,14 +17,15 @@
from ebooklib.plugins.base import BasePlugin
from ebooklib.utils import parse_html_string
+
class BooktypeLinks(BasePlugin):
- NAME = 'Booktype Links'
+ NAME = "Booktype Links"
def __init__(self, booktype_book):
self.booktype_book = booktype_book
def html_before_write(self, book, chapter):
- from lxml import etree
+ from lxml import etree
try:
from urlparse import urlparse, urljoin
@@ -38,44 +39,44 @@ def html_before_write(self, book, chapter):
root = tree.getroottree()
- if len(root.find('body')) != 0:
- body = tree.find('body')
+ if len(root.find("body")) != 0:
+ body = tree.find("body")
# should also be aware to handle
# ../chapter/
# ../chapter/#reference
# ../chapter#reference
- for _link in body.xpath('//a'):
+ for _link in body.xpath("//a"):
# This is just temporary for the footnotes
- if _link.get('href', '').find('InsertNoteID') != -1:
- _ln = _link.get('href', '')
- i = _ln.find('#')
- _link.set('href', _ln[i:])
+ if _link.get("href", "").find("InsertNoteID") != -1:
+ _ln = _link.get("href", "")
+ i = _ln.find("#")
+ _link.set("href", _ln[i:])
continue
- _u = urlparse(_link.get('href', ''))
+ _u = urlparse(_link.get("href", ""))
# Let us care only for internal links at the moment
- if _u.scheme == '':
- if _u.path != '':
- _link.set('href', '%s.xhtml' % _u.path)
-
- if _u.fragment != '':
- _link.set('href', urljoin(_link.get('href'), '#%s' % _u.fragment))
-
- if _link.get('name') != None:
- _link.set('id', _link.get('name'))
- etree.strip_attributes(_link, 'name')
+ if _u.scheme == "":
+ if _u.path != "":
+ _link.set("href", "%s.xhtml" % _u.path)
- chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8')
+ if _u.fragment != "":
+ _link.set(
+ "href", urljoin(_link.get("href"), "#%s" % _u.fragment)
+ )
+ if _link.get("name") != None:
+ _link.set("id", _link.get("name"))
+ etree.strip_attributes(_link, "name")
+ chapter.content = etree.tostring(tree, pretty_print=True, encoding="utf-8")
class BooktypeFootnotes(BasePlugin):
- NAME = 'Booktype Footnotes'
+ NAME = "Booktype Footnotes"
def __init__(self, booktype_book):
self.booktype_book = booktype_book
@@ -92,8 +93,8 @@ def html_before_write(self, book, chapter):
root = tree.getroottree()
- if len(root.find('body')) != 0:
- body = tree.find('body')
+ if len(root.find("body")) != 0:
+ body = tree.find("body")
# 1
# - prvi footnote ^
@@ -101,19 +102,19 @@ def html_before_write(self, book, chapter):
# 1
#
for footnote in body.xpath('//span[@class="InsertNoteMarker"]'):
- footnote_id = footnote.get('id')[:-8]
+ footnote_id = footnote.get("id")[:-8]
a = footnote.getchildren()[0].getchildren()[0]
footnote_text = body.xpath('//li[@id="%s"]' % footnote_id)[0]
- a.attrib['{%s}type' % epub.NAMESPACES['EPUB']] = 'noteref'
- ftn = etree.SubElement(body, 'aside', {'id': footnote_id})
- ftn.attrib['{%s}type' % epub.NAMESPACES['EPUB']] = 'footnote'
- ftn_p = etree.SubElement(ftn, 'p')
+ a.attrib["{%s}type" % epub.NAMESPACES["EPUB"]] = "noteref"
+ ftn = etree.SubElement(body, "aside", {"id": footnote_id})
+ ftn.attrib["{%s}type" % epub.NAMESPACES["EPUB"]] = "footnote"
+ ftn_p = etree.SubElement(ftn, "p")
ftn_p.text = footnote_text.text
old_footnote = body.xpath('//ol[@id="InsertNote_NoteList"]')
if len(old_footnote) > 0:
body.remove(old_footnote[0])
- chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8')
+ chapter.content = etree.tostring(tree, pretty_print=True, encoding="utf-8")
diff --git a/ebooklib/plugins/sourcecode.py b/ebooklib/plugins/sourcecode.py
index 4f973a2..a279737 100644
--- a/ebooklib/plugins/sourcecode.py
+++ b/ebooklib/plugins/sourcecode.py
@@ -17,7 +17,8 @@
from ebooklib.plugins.base import BasePlugin
from ebooklib.utils import parse_html_string
-class SourceHighlighter(BasePlugin):
+
+class SourceHighlighter(BasePlugin):
def __init__(self):
pass
@@ -38,24 +39,26 @@ def html_before_write(self, book, chapter):
had_source = False
- if len(root.find('body')) != 0:
- body = tree.find('body')
+ if len(root.find("body")) != 0:
+ body = tree.find("body")
# check for embeded source
for source in body.xpath('//pre[contains(@class,"source-")]'):
- css_class = source.get('class')
+ css_class = source.get("class")
- source_text = (source.text or '') + ''.join([html.tostring(child) for child in source.iterchildren()])
+ source_text = (source.text or "") + "".join(
+ [html.tostring(child) for child in source.iterchildren()]
+ )
- if 'source-python' in css_class:
+ if "source-python" in css_class:
from pygments.lexers import PythonLexer
-# _text = highlight(source_text, PythonLexer(), HtmlFormatter(linenos="inline"))
- _text = highlight(source_text, PythonLexer(), HtmlFormatter())
+ # _text = highlight(source_text, PythonLexer(), HtmlFormatter(linenos="inline"))
+ _text = highlight(source_text, PythonLexer(), HtmlFormatter())
- if 'source-css' in css_class:
+ if "source-css" in css_class:
from pygments.lexers import CssLexer
- _text = highlight(source_text, CssLexer(), HtmlFormatter())
+ _text = highlight(source_text, CssLexer(), HtmlFormatter())
_parent = source.getparent()
_parent.replace(source, etree.XML(_text))
@@ -64,5 +67,4 @@ def html_before_write(self, book, chapter):
if had_source:
chapter.add_link(href="style/code.css", rel="stylesheet", type="text/css")
- chapter.content = etree.tostring(tree, pretty_print=True, encoding='utf-8')
-
+ chapter.content = etree.tostring(tree, pretty_print=True, encoding="utf-8")
diff --git a/ebooklib/plugins/standard.py b/ebooklib/plugins/standard.py
index 61576f9..94ebdef 100644
--- a/ebooklib/plugins/standard.py
+++ b/ebooklib/plugins/standard.py
@@ -23,14 +23,48 @@
# - should also look for the _required_ elements
# http://www.w3.org/html/wg/drafts/html/master/tabular-data.html#the-table-element
-ATTRIBUTES_GLOBAL = ['accesskey', 'class', 'contenteditable', 'contextmenu', 'dir', 'draggable',
- 'dropzone', 'hidden', 'id', 'inert', 'itemid', 'itemprop', 'itemref',
- 'itemscope', 'itemtype', 'lang', 'spellcheck', 'style', 'tabindex',
- 'title', 'translate', 'epub:type']
+ATTRIBUTES_GLOBAL = [
+ "accesskey",
+ "class",
+ "contenteditable",
+ "contextmenu",
+ "dir",
+ "draggable",
+ "dropzone",
+ "hidden",
+ "id",
+ "inert",
+ "itemid",
+ "itemprop",
+ "itemref",
+ "itemscope",
+ "itemtype",
+ "lang",
+ "spellcheck",
+ "style",
+ "tabindex",
+ "title",
+ "translate",
+ "epub:type",
+]
# Remove for now from here
-DEPRECATED_TAGS = ['acronym', 'applet', 'basefont', 'big', 'center', 'dir', 'font', 'frame',
- 'frameset', 'isindex', 'noframes', 's', 'strike', 'tt']
+DEPRECATED_TAGS = [
+ "acronym",
+ "applet",
+ "basefont",
+ "big",
+ "center",
+ "dir",
+ "font",
+ "frame",
+ "frameset",
+ "isindex",
+ "noframes",
+ "s",
+ "strike",
+ "tt",
+]
def leave_only(item, tag_list):
@@ -40,7 +74,7 @@ def leave_only(item, tag_list):
class SyntaxPlugin(BasePlugin):
- NAME = 'Check HTML syntax'
+ NAME = "Check HTML syntax"
def html_before_write(self, book, chapter):
from lxml import etree
@@ -57,130 +91,307 @@ def html_before_write(self, book, chapter):
for tag in DEPRECATED_TAGS:
etree.strip_tags(root, tag)
- head = tree.find('head')
-
+ head = tree.find("head")
+
if head is not None and len(head) != 0:
-
+
for _item in head:
- if _item.tag == 'base':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['href', 'target'])
- elif _item.tag == 'link':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['href', 'crossorigin', 'rel', 'media', 'hreflang', 'type', 'sizes'])
- elif _item.tag == 'title':
- if _item.text == '':
+ if _item.tag == "base":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["href", "target"])
+ elif _item.tag == "link":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + [
+ "href",
+ "crossorigin",
+ "rel",
+ "media",
+ "hreflang",
+ "type",
+ "sizes",
+ ],
+ )
+ elif _item.tag == "title":
+ if _item.text == "":
head.remove(_item)
- elif _item.tag == 'meta':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['name', 'http-equiv', 'content', 'charset'])
+ elif _item.tag == "meta":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + ["name", "http-equiv", "content", "charset"],
+ )
# just remove for now, but really should not be like this
- head.remove(_item)
- elif _item.tag == 'script':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'type', 'charset', 'async', 'defer', 'crossorigin'])
- elif _item.tag == 'source':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'type', 'media'])
- elif _item.tag == 'style':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['media', 'type', 'scoped'])
+ head.remove(_item)
+ elif _item.tag == "script":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + ["src", "type", "charset", "async", "defer", "crossorigin"],
+ )
+ elif _item.tag == "source":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["src", "type", "media"])
+ elif _item.tag == "style":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["media", "type", "scoped"])
else:
leave_only(_item, ATTRIBUTES_GLOBAL)
-
- if len(root.find('body')) != 0:
- body = tree.find('body')
+ if len(root.find("body")) != 0:
+ body = tree.find("body")
for _item in body.iter():
# it is not
#
-
- if _item.tag == 'a':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['href', 'target', 'download', 'rel', 'hreflang', 'type'])
- elif _item.tag == 'area':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['alt', 'coords', 'shape', 'href', 'target', 'download', 'rel', 'hreflang', 'type'])
- elif _item.tag == 'audio':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'crossorigin', 'preload', 'autoplay', 'mediagroup', 'loop', 'muted', 'controls'])
- elif _item.tag == 'blockquote':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['cite'])
- elif _item.tag == 'button':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['autofocus', 'disabled', 'form', 'formaction', 'formenctype', 'formmethod', 'formnovalidate',
- 'formtarget', 'name', 'type', 'value', 'menu'])
- elif _item.tag == 'canvas':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['width', 'height'])
- elif _item.tag == 'canvas':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['width', 'height'])
- elif _item.tag == 'del':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['cite', 'datetime'])
- elif _item.tag == 'details':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['open'])
- elif _item.tag == 'embed':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'type', 'width', 'height'])
- elif _item.tag == 'fieldset':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['disable', 'form', 'name'])
- elif _item.tag == 'details':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['accept-charset', 'action', 'autocomplete', 'enctype', 'method', 'name', 'novalidate', 'target'])
- elif _item.tag == 'iframe':
- leave_only(_item, ATTRIBUTES_GLOBAL + ['src', 'srcdoc', 'name', 'sandbox', 'seamless', 'allowfullscreen', 'width', 'height'])
- elif _item.tag == 'img':
- _src = _item.get('src', '').lower()
- if _src.startswith('http://') or _src.startswith('https://'):
- if 'remote-resources' not in chapter.properties:
- chapter.properties.append('remote-resources')
+
+ if _item.tag == "a":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + ["href", "target", "download", "rel", "hreflang", "type"],
+ )
+ elif _item.tag == "area":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + [
+ "alt",
+ "coords",
+ "shape",
+ "href",
+ "target",
+ "download",
+ "rel",
+ "hreflang",
+ "type",
+ ],
+ )
+ elif _item.tag == "audio":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + [
+ "src",
+ "crossorigin",
+ "preload",
+ "autoplay",
+ "mediagroup",
+ "loop",
+ "muted",
+ "controls",
+ ],
+ )
+ elif _item.tag == "blockquote":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["cite"])
+ elif _item.tag == "button":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + [
+ "autofocus",
+ "disabled",
+ "form",
+ "formaction",
+ "formenctype",
+ "formmethod",
+ "formnovalidate",
+ "formtarget",
+ "name",
+ "type",
+ "value",
+ "menu",
+ ],
+ )
+ elif _item.tag == "canvas":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["width", "height"])
+ elif _item.tag == "canvas":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["width", "height"])
+ elif _item.tag == "del":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["cite", "datetime"])
+ elif _item.tag == "details":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["open"])
+ elif _item.tag == "embed":
+ leave_only(
+ _item, ATTRIBUTES_GLOBAL + ["src", "type", "width", "height"]
+ )
+ elif _item.tag == "fieldset":
+ leave_only(_item, ATTRIBUTES_GLOBAL + ["disable", "form", "name"])
+ elif _item.tag == "details":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + [
+ "accept-charset",
+ "action",
+ "autocomplete",
+ "enctype",
+ "method",
+ "name",
+ "novalidate",
+ "target",
+ ],
+ )
+ elif _item.tag == "iframe":
+ leave_only(
+ _item,
+ ATTRIBUTES_GLOBAL
+ + [
+ "src",
+ "srcdoc",
+ "name",
+ "sandbox",
+ "seamless",
+ "allowfullscreen",
+ "width",
+ "height",
+ ],
+ )
+ elif _item.tag == "img":
+ _src = _item.get("src", "").lower()
+ if _src.startswith("http://") or _src.startswith("https://"):
+ if "remote-resources" not in chapter.properties:
+ chapter.properties.append("remote-resources")
# THIS DOES NOT WORK, ONLY VIDEO AND AUDIO FILES CAN BE REMOTE RESOURCES
# THAT MEANS I SHOULD ALSO CATCH 0:
text = headers[0].text_content().strip()
@@ -97,20 +99,20 @@ def get_pages(item):
pages = []
for elem in body.iter():
- if 'epub:type' in elem.attrib:
- if elem.get('id') is not None:
+ if "epub:type" in elem.attrib:
+ if elem.get("id") is not None:
_text = None
-
- if elem.text is not None and elem.text.strip() != '':
+
+ if elem.text is not None and elem.text.strip() != "":
_text = elem.text.strip()
if _text is None:
- _text = elem.get('aria-label')
+ _text = elem.get("aria-label")
if _text is None:
_text = get_headers(elem)
- pages.append((item.get_name(), elem.get('id'), _text or elem.get('id')))
+ pages.append((item.get_name(), elem.get("id"), _text or elem.get("id")))
return pages
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..db272a8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+lxml
+six
"""
+)
-CHAPTER_XML = six.b('''''')
+CHAPTER_XML = six.b(
+ """"""
+)
-COVER_XML = six.b('''
+COVER_XML = six.b(
+ """