diff --git a/ebooklib/epub.py b/ebooklib/epub.py index 34167cc..fcb4da6 100644 --- a/ebooklib/epub.py +++ b/ebooklib/epub.py @@ -15,6 +15,7 @@ # along with EbookLib. If not, see . import zipfile +import io import six import logging import uuid @@ -36,7 +37,7 @@ # Version of EPUB library -VERSION = (0, 18, 1) +VERSION = (0, 18, 2) NAMESPACES = {'XML': 'http://www.w3.org/XML/1998/namespace', 'EPUB': 'http://www.idpf.org/2007/ops', @@ -120,7 +121,7 @@ class EpubItem(object): Base class for the items in a book. """ - def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), manifest=True): + def __init__(self, uid=None, file_name='', media_type='', content=six.BytesIO(), manifest=True): """ :Args: - uid: Unique identifier for this item (optional) @@ -132,11 +133,24 @@ def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), man self.id = uid self.file_name = file_name self.media_type = media_type - self.content = content + self.content=None + self.set_content(content) self.is_linear = True self.manifest = manifest self.book = None + def write(self, buff): + if buff == None: + pass + elif isinstance(buff, str): + self.content.write(buff.encode()) + elif isinstance(buff, six.binary_type): + self.content.write(buff) + elif isinstance(buff, io.IOBase): + self.content.write(buff.read()) + else: + raise ValueError(f"content type {type(buff)} not recognized") + def get_id(self): """ @@ -197,7 +211,8 @@ def get_content(self, default=six.b('')): :Returns: Returns content of the item. """ - return self.content or default + self.content.seek(0) + return self.content.read() or default def set_content(self, content): """ @@ -206,7 +221,13 @@ def set_content(self, content): :Args: - content: Content value """ - self.content = content + if self.content: + self.content.close() + if isinstance(content, io.IOBase): + self.content = content + else: + self.content = six.BytesIO() + self.write(content) def __str__(self): return '' % self.id @@ -349,7 +370,7 @@ def get_body_content(self): """ try: - html_tree = parse_html_string(self.content) + html_tree = parse_html_string(self.get_content()) except: return '' @@ -392,7 +413,7 @@ def get_content(self, default=None): # try: - html_tree = parse_html_string(self.content) + html_tree = parse_html_string(super().get_content()) except: return '' @@ -472,7 +493,7 @@ def get_content(self): Returns content of this document. """ - self.content = self.book.get_template('cover') + self.set_content(self.book.get_template('cover')) tree = parse_string(super(EpubCoverHtml, self).get_content()) tree_root = tree.getroot() @@ -649,7 +670,7 @@ def set_cover(self, file_name, content, create_page=True): # as it is now, it can only be called once c0 = EpubCover(file_name=file_name) - c0.content = content + c0.write(content) self.add_item(c0) if create_page: @@ -1416,7 +1437,7 @@ def load(self): def read_file(self, name): # Raises KeyError name = zip_path.normpath(name) - return self.zf.read(name) + return self.zf.open(name) def _load_container(self): meta_inf = self.read_file('META-INF/container.xml') @@ -1509,20 +1530,20 @@ def _load_manifest(self): if media_type == 'application/x-dtbncx+xml': ei = EpubNcx(uid=r.get('id'), file_name=unquote(r.get('href'))) - ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name)) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.file_name))) elif media_type == 'application/smil+xml': ei = EpubSMIL(uid=r.get('id'), file_name=unquote(r.get('href'))) - ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name)) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.file_name))) elif media_type == 'application/xhtml+xml': if 'nav' in properties: ei = EpubNav(uid=r.get('id'), file_name=unquote(r.get('href'))) - ei.content = self.read_file(zip_path.join(self.opf_dir, r.get('href'))) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, r.get('href')))) elif 'cover' in properties: ei = EpubCoverHtml() - ei.content = self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href')))) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href'))))) else: ei = EpubHtml() @@ -1531,21 +1552,21 @@ def _load_manifest(self): ei.media_type = media_type ei.media_overlay = r.get('media-overlay', None) ei.media_duration = r.get('duration', None) - ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name())) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name()))) ei.properties = properties elif media_type in IMAGE_MEDIA_TYPES: if 'cover-image' in properties: ei = EpubCover(uid=r.get('id'), file_name=unquote(r.get('href'))) ei.media_type = media_type - ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name())) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name()))) else: ei = EpubImage() ei.id = r.get('id') ei.file_name = unquote(r.get('href')) ei.media_type = media_type - ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name())) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name()))) else: # different types ei = EpubItem() @@ -1554,7 +1575,7 @@ def _load_manifest(self): ei.file_name = unquote(r.get('href')) ei.media_type = media_type - ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name())) + ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name()))) self.book.add_item(ei) @@ -1693,7 +1714,7 @@ def _load_opf_file(self): navtype='toc' ) self._parse_nav( - nav_item.content, + nav_item.get_content(), zip_path.dirname(nav_item.file_name), navtype='pages' ) @@ -1706,7 +1727,8 @@ class Directory: def read(self, subname): with open(os.path.join(file_name, subname), 'rb') as fp: return fp.read() - + def open(self, subname): + return open(os.path.join(file_name, subname), 'rb') def close(self): pass diff --git a/samples/01_basic_create/create.py b/samples/01_basic_create/create.py index d099ed4..45987a4 100644 --- a/samples/01_basic_create/create.py +++ b/samples/01_basic_create/create.py @@ -15,11 +15,11 @@ # intro chapter c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en') - c1.content=u'

Introduction

Introduction paragraph where i explain what is happening.

' + c1.set_content(u'

Introduction

Introduction paragraph where i explain what is happening.

') # about chapter c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml') - c2.content='

About this book

Helou, this is my book! There are many books, but this one is mine.

' + c2.content.write(b'

About this book

Helou, this is my book! There are many books, but this one is mine.

') # add chapters to the book book.add_item(c1) diff --git a/samples/02_cover_create/create.py b/samples/02_cover_create/create.py index ee3cf75..ab42f27 100644 --- a/samples/02_cover_create/create.py +++ b/samples/02_cover_create/create.py @@ -18,11 +18,11 @@ # intro chapter c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='hr') - c1.content=u'

Introduction

Introduction paragraph where i explain what is happening.

' + c1.set_content(u'

Introduction

Introduction paragraph where i explain what is happening.

') # about chapter c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml') - c2.content='

About this book

Helou, this is my book! There are many books, but this one is mine.

Cover Image

' + c2.content.write(b'

About this book

Helou, this is my book! There are many books, but this one is mine.

Cover Image

') # add chapters to the book book.add_item(c1) diff --git a/samples/03_advanced_create/create.py b/samples/03_advanced_create/create.py index 909046b..52b8533 100644 --- a/samples/03_advanced_create/create.py +++ b/samples/03_advanced_create/create.py @@ -15,7 +15,7 @@ # intro chapter c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='hr') - c1.content=u'

Introduction

Introduction paragraph where i explain what is happening.

' + c1.set_content(u'

Introduction

Introduction paragraph where i explain what is happening.

') # defube style style = '''BODY { text-align: justify;}''' @@ -26,7 +26,7 @@ # about chapter c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml') - c2.content='

About this book

Helou, this is my book! There are many books, but this one is mine.

' + c2.write('

About this book

Helou, this is my book! There are many books, but this one is mine.

') c2.set_language('hr') c2.properties.append('rendition:layout-pre-paginated rendition:orientation-landscape rendition:spread-none') c2.add_item(default_css) diff --git a/samples/04_markdown_parse/epub2markdown.py b/samples/04_markdown_parse/epub2markdown.py index 19df3e1..939192c 100755 --- a/samples/04_markdown_parse/epub2markdown.py +++ b/samples/04_markdown_parse/epub2markdown.py @@ -20,11 +20,11 @@ proc = subprocess.Popen(['pandoc', '-f', 'html', '-t', 'markdown', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - content, error = proc.communicate(item.content) + content, error = proc.communicate(item.get_content()) file_name = os.path.splitext(item.file_name)[0] + '.md' else: file_name = item.file_name - content = item.content + content = item.get_content() # create needed directories dir_name = '{0}/{1}'.format(base_name, os.path.dirname(file_name)) diff --git a/samples/07_pagebreaks/create.py b/samples/07_pagebreaks/create.py index 9c6ad52..378a145 100644 --- a/samples/07_pagebreaks/create.py +++ b/samples/07_pagebreaks/create.py @@ -17,22 +17,22 @@ # build the chapter HTML and add the page break c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en') - c1.content = u'

Introduction

1This chapter has a visible page number.

2Something else now.

' + c1.set_content(u'

Introduction

1This chapter has a visible page number.

2Something else now.

') c2 = epub.EpubHtml(title='Chapter the Second', file_name='chap02.xhtml', lang='en') - c2.content = u'

Chapter the Second

This chapter has two page breaks, both with invisible page numbers.

' + c2.content.write(u'

Chapter the Second

This chapter has two page breaks, both with invisible page numbers.

'.encode()) # Add invisible page numbers that match the printed text, for accessibility - c2.content += create_pagebreak("2") + c2.write(create_pagebreak("2")) # You can add more content after the page break - c2.content += u'

This is the second page in the second chapter, after the invisible page break.

' + c2.write(u'

This is the second page in the second chapter, after the invisible page break.

') # Add invisible page numbers that match the printed text, for accessibility - c2.content += create_pagebreak("3", label="Page 3") + c2.write(create_pagebreak("3", label="Page 3")) # close the chapter - c2.content += u'' + c2.write(u'') # add chapters to the book book.add_item(c1) diff --git a/samples/10_large_files/README.md b/samples/10_large_files/README.md new file mode 100644 index 0000000..4ce1066 --- /dev/null +++ b/samples/10_large_files/README.md @@ -0,0 +1,8 @@ +Large create +============ + +Creates LARGE EPUB files + +## Start + + python create.py \ No newline at end of file diff --git a/samples/10_large_files/create.py b/samples/10_large_files/create.py new file mode 100644 index 0000000..bf4391e --- /dev/null +++ b/samples/10_large_files/create.py @@ -0,0 +1,87 @@ +# coding=utf-8 + +from ebooklib import epub +from tempfile import TemporaryFile + + +if __name__ == '__main__': + book = epub.EpubBook() + + # add metadata + book.set_identifier('sample123456') + book.set_title('Sample book') + book.set_language('en') + + book.add_author('Aleksandar Erkalovic') + + # intro chapter + c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en') + c1.set_content(u'

Introduction

Introduction paragraph where i explain what is happening.

') + + # about chapter + c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml', content=TemporaryFile()) + c2.write('

About this book

') + for i in range(1024): + c2.write('

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin rutrum erat ipsum, at fringilla sem sodales ut. Donec rutrum condimentum leo, non convallis ipsum sodales vel. Sed vulputate, quam dapibus pharetra viverra, nunc magna fermentum ligula, sed placerat enim diam id nisi. Sed justo nunc, placerat vel rutrum eget, lacinia quis ante. Maecenas semper turpis lectus, sed sollicitudin diam feugiat vitae. Mauris massa felis, cursus non enim a, consequat pulvinar mauris. Proin scelerisque neque felis, in fringilla ligula tristique ac. Phasellus interdum lacus neque, ac efficitur nibh consequat a. Donec enim enim, commodo sed finibus in, dignissim sit amet arcu. Nam mollis eu ipsum sed ornare. Maecenas non ipsum molestie, volutpat nulla quis, accumsan arcu. Cras imperdiet augue interdum ipsum laoreet malesuada vitae consectetur ipsum. Quisque vitae tortor augue.

') + + # add chapters to the book + book.add_item(c1) + book.add_item(c2) + + # create table of contents + # - add section + # - add auto created links to chapters + + book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'), + (epub.Section('Languages'), + (c1, c2)) + ) + + # add navigation files + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + # define css style + style = ''' +@namespace epub "http://www.idpf.org/2007/ops"; + +body { + font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif; +} + +h2 { + text-align: left; + text-transform: uppercase; + font-weight: 200; +} + +ol { + list-style-type: none; +} + +ol > li:first-child { + margin-top: 0.3em; +} + + +nav[epub|type~='toc'] > ol > li > ol { + list-style-type:square; +} + + +nav[epub|type~='toc'] > ol > li > ol > li { + margin-top: 0.3em; +} + +''' + + # add css file + nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) + book.add_item(nav_css) + + # create spine + book.spine = ['nav', c1, c2] + + # create epub file + epub.write_epub('test.epub', book, {}) +