Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored EpubItem content to be IOBase. #277

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 42 additions & 20 deletions ebooklib/epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# along with EbookLib. If not, see <http://www.gnu.org/licenses/>.

import zipfile
import io
import six
import logging
import uuid
Expand All @@ -36,7 +37,7 @@


# Version of EPUB library
VERSION = (0, 18, 1)
VERSION = (0, 18, 2)

NAMESPACES = {'XML': 'http://www.w3.org/XML/1998/namespace',
'EPUB': 'http://www.idpf.org/2007/ops',
Expand Down Expand Up @@ -120,7 +121,7 @@ class EpubItem(object):
Base class for the items in a book.
"""

def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), manifest=True):
def __init__(self, uid=None, file_name='', media_type='', content=six.BytesIO(), manifest=True):
"""
:Args:
- uid: Unique identifier for this item (optional)
Expand All @@ -132,11 +133,24 @@ def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), man
self.id = uid
self.file_name = file_name
self.media_type = media_type
self.content = content
self.content=None
self.set_content(content)
self.is_linear = True
self.manifest = manifest

self.book = None
def write(self, buff):
if buff == None:
pass
elif isinstance(buff, str):
self.content.write(buff.encode())
elif isinstance(buff, six.binary_type):
self.content.write(buff)
elif isinstance(buff, io.IOBase):
self.content.write(buff.read())
else:
raise ValueError(f"content type {type(buff)} not recognized")


def get_id(self):
"""
Expand Down Expand Up @@ -197,7 +211,8 @@ def get_content(self, default=six.b('')):
:Returns:
Returns content of the item.
"""
return self.content or default
self.content.seek(0)
return self.content.read() or default

def set_content(self, content):
"""
Expand All @@ -206,7 +221,13 @@ def set_content(self, content):
:Args:
- content: Content value
"""
self.content = content
if self.content:
self.content.close()
if isinstance(content, io.IOBase):
self.content = content
else:
self.content = six.BytesIO()
self.write(content)

def __str__(self):
return '<EpubItem:%s>' % self.id
Expand Down Expand Up @@ -349,7 +370,7 @@ def get_body_content(self):
"""

try:
html_tree = parse_html_string(self.content)
html_tree = parse_html_string(self.get_content())
except:
return ''

Expand Down Expand Up @@ -392,7 +413,7 @@ def get_content(self, default=None):
# <meta charset="utf-8" />

try:
html_tree = parse_html_string(self.content)
html_tree = parse_html_string(super().get_content())
except:
return ''

Expand Down Expand Up @@ -472,7 +493,7 @@ def get_content(self):
Returns content of this document.
"""

self.content = self.book.get_template('cover')
self.set_content(self.book.get_template('cover'))

tree = parse_string(super(EpubCoverHtml, self).get_content())
tree_root = tree.getroot()
Expand Down Expand Up @@ -649,7 +670,7 @@ def set_cover(self, file_name, content, create_page=True):

# as it is now, it can only be called once
c0 = EpubCover(file_name=file_name)
c0.content = content
c0.write(content)
self.add_item(c0)

if create_page:
Expand Down Expand Up @@ -1416,7 +1437,7 @@ def load(self):
def read_file(self, name):
# Raises KeyError
name = zip_path.normpath(name)
return self.zf.read(name)
return self.zf.open(name)

def _load_container(self):
meta_inf = self.read_file('META-INF/container.xml')
Expand Down Expand Up @@ -1509,20 +1530,20 @@ def _load_manifest(self):
if media_type == 'application/x-dtbncx+xml':
ei = EpubNcx(uid=r.get('id'), file_name=unquote(r.get('href')))

ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.file_name)))
elif media_type == 'application/smil+xml':
ei = EpubSMIL(uid=r.get('id'), file_name=unquote(r.get('href')))

ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.file_name)))
elif media_type == 'application/xhtml+xml':
if 'nav' in properties:
ei = EpubNav(uid=r.get('id'), file_name=unquote(r.get('href')))

ei.content = self.read_file(zip_path.join(self.opf_dir, r.get('href')))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, r.get('href'))))
elif 'cover' in properties:
ei = EpubCoverHtml()

ei.content = self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href'))))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href')))))
else:
ei = EpubHtml()

Expand All @@ -1531,21 +1552,21 @@ def _load_manifest(self):
ei.media_type = media_type
ei.media_overlay = r.get('media-overlay', None)
ei.media_duration = r.get('duration', None)
ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
ei.properties = properties
elif media_type in IMAGE_MEDIA_TYPES:
if 'cover-image' in properties:
ei = EpubCover(uid=r.get('id'), file_name=unquote(r.get('href')))

ei.media_type = media_type
ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
else:
ei = EpubImage()

ei.id = r.get('id')
ei.file_name = unquote(r.get('href'))
ei.media_type = media_type
ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
else:
# different types
ei = EpubItem()
Expand All @@ -1554,7 +1575,7 @@ def _load_manifest(self):
ei.file_name = unquote(r.get('href'))
ei.media_type = media_type

ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))

self.book.add_item(ei)

Expand Down Expand Up @@ -1693,7 +1714,7 @@ def _load_opf_file(self):
navtype='toc'
)
self._parse_nav(
nav_item.content,
nav_item.get_content(),
zip_path.dirname(nav_item.file_name),
navtype='pages'
)
Expand All @@ -1706,7 +1727,8 @@ class Directory:
def read(self, subname):
with open(os.path.join(file_name, subname), 'rb') as fp:
return fp.read()

def open(self, subname):
return open(os.path.join(file_name, subname), 'rb')
def close(self):
pass

Expand Down
4 changes: 2 additions & 2 deletions samples/01_basic_create/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@

# intro chapter
c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en')
c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>'
c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')

# about chapter
c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
c2.content='<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>'
c2.content.write(b'<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>')

# add chapters to the book
book.add_item(c1)
Expand Down
4 changes: 2 additions & 2 deletions samples/02_cover_create/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@

# intro chapter
c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='hr')
c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>'
c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')

# about chapter
c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
c2.content='<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p><p><img src="image.jpg" alt="Cover Image"/></p>'
c2.content.write(b'<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p><p><img src="image.jpg" alt="Cover Image"/></p>')

# add chapters to the book
book.add_item(c1)
Expand Down
4 changes: 2 additions & 2 deletions samples/03_advanced_create/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# intro chapter
c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='hr')
c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>'
c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')

# defube style
style = '''BODY { text-align: justify;}'''
Expand All @@ -26,7 +26,7 @@

# about chapter
c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
c2.content='<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>'
c2.write('<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>')
c2.set_language('hr')
c2.properties.append('rendition:layout-pre-paginated rendition:orientation-landscape rendition:spread-none')
c2.add_item(default_css)
Expand Down
4 changes: 2 additions & 2 deletions samples/04_markdown_parse/epub2markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
proc = subprocess.Popen(['pandoc', '-f', 'html', '-t', 'markdown', '-'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
content, error = proc.communicate(item.content)
content, error = proc.communicate(item.get_content())
file_name = os.path.splitext(item.file_name)[0] + '.md'
else:
file_name = item.file_name
content = item.content
content = item.get_content()

# create needed directories
dir_name = '{0}/{1}'.format(base_name, os.path.dirname(file_name))
Expand Down
12 changes: 6 additions & 6 deletions samples/07_pagebreaks/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@

# build the chapter HTML and add the page break
c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en')
c1.content = u'<h1>Introduction</h1><p><span id="page1" epub:type="pagebreak">1</span>This chapter has a visible page number.</p><p><span id="page2" epub:type="pagebreak">2</span>Something else now.</p>'
c1.set_content(u'<h1>Introduction</h1><p><span id="page1" epub:type="pagebreak">1</span>This chapter has a visible page number.</p><p><span id="page2" epub:type="pagebreak">2</span>Something else now.</p>')

c2 = epub.EpubHtml(title='Chapter the Second', file_name='chap02.xhtml', lang='en')
c2.content = u'<html><head></head><body><h1>Chapter the Second</h1><p>This chapter has two page breaks, both with invisible page numbers.</p>'
c2.content.write(u'<html><head></head><body><h1>Chapter the Second</h1><p>This chapter has two page breaks, both with invisible page numbers.</p>'.encode())

# Add invisible page numbers that match the printed text, for accessibility
c2.content += create_pagebreak("2")
c2.write(create_pagebreak("2"))

# You can add more content after the page break
c2.content += u'<p>This is the second page in the second chapter, after the invisible page break.</p>'
c2.write(u'<p>This is the second page in the second chapter, after the invisible page break.</p>')

# Add invisible page numbers that match the printed text, for accessibility
c2.content += create_pagebreak("3", label="Page 3")
c2.write(create_pagebreak("3", label="Page 3"))

# close the chapter
c2.content += u'</body></html>'
c2.write(u'</body></html>')

# add chapters to the book
book.add_item(c1)
Expand Down
8 changes: 8 additions & 0 deletions samples/10_large_files/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Large create
============

Creates LARGE EPUB files

## Start

python create.py
87 changes: 87 additions & 0 deletions samples/10_large_files/create.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# coding=utf-8

from ebooklib import epub
from tempfile import TemporaryFile


if __name__ == '__main__':
book = epub.EpubBook()

# add metadata
book.set_identifier('sample123456')
book.set_title('Sample book')
book.set_language('en')

book.add_author('Aleksandar Erkalovic')

# intro chapter
c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en')
c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')

# about chapter
c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml', content=TemporaryFile())
c2.write('<h1>About this book</h1>')
for i in range(1024):
c2.write('<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin rutrum erat ipsum, at fringilla sem sodales ut. Donec rutrum condimentum leo, non convallis ipsum sodales vel. Sed vulputate, quam dapibus pharetra viverra, nunc magna fermentum ligula, sed placerat enim diam id nisi. Sed justo nunc, placerat vel rutrum eget, lacinia quis ante. Maecenas semper turpis lectus, sed sollicitudin diam feugiat vitae. Mauris massa felis, cursus non enim a, consequat pulvinar mauris. Proin scelerisque neque felis, in fringilla ligula tristique ac. Phasellus interdum lacus neque, ac efficitur nibh consequat a. Donec enim enim, commodo sed finibus in, dignissim sit amet arcu. Nam mollis eu ipsum sed ornare. Maecenas non ipsum molestie, volutpat nulla quis, accumsan arcu. Cras imperdiet augue interdum ipsum laoreet malesuada vitae consectetur ipsum. Quisque vitae tortor augue.</p>')

# add chapters to the book
book.add_item(c1)
book.add_item(c2)

# create table of contents
# - add section
# - add auto created links to chapters

book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
(epub.Section('Languages'),
(c1, c2))
)

# add navigation files
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

# define css style
style = '''
@namespace epub "http://www.idpf.org/2007/ops";

body {
font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif;
}

h2 {
text-align: left;
text-transform: uppercase;
font-weight: 200;
}

ol {
list-style-type: none;
}

ol > li:first-child {
margin-top: 0.3em;
}


nav[epub|type~='toc'] > ol > li > ol {
list-style-type:square;
}


nav[epub|type~='toc'] > ol > li > ol > li {
margin-top: 0.3em;
}

'''

# add css file
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
book.add_item(nav_css)

# create spine
book.spine = ['nav', c1, c2]

# create epub file
epub.write_epub('test.epub', book, {})