aerkalov · roycabaniss · Feb 19, 2023 · Feb 20, 2023 · Oct 19, 2023
diff --git a/ebooklib/epub.py b/ebooklib/epub.py
@@ -15,6 +15,7 @@
 # along with EbookLib.  If not, see <http://www.gnu.org/licenses/>.
 
 import zipfile
+import io
 import six
 import logging
 import uuid
@@ -36,7 +37,7 @@
 
 
 # Version of EPUB library
-VERSION = (0, 18, 1)
+VERSION = (0, 18, 2)
 
 NAMESPACES = {'XML': 'http://www.w3.org/XML/1998/namespace',
               'EPUB': 'http://www.idpf.org/2007/ops',
@@ -120,7 +121,7 @@ class EpubItem(object):
     Base class for the items in a book.
     """
 
-    def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), manifest=True):
+    def __init__(self, uid=None, file_name='', media_type='', content=six.BytesIO(), manifest=True):
         """
         :Args:
           - uid: Unique identifier for this item (optional)
@@ -132,11 +133,24 @@ def __init__(self, uid=None, file_name='', media_type='', content=six.b(''), man
         self.id = uid
         self.file_name = file_name
         self.media_type = media_type
-        self.content = content
+        self.content=None
+        self.set_content(content)
         self.is_linear = True
         self.manifest = manifest
 
         self.book = None
+    def write(self, buff):
+        if buff == None:
+            pass
+        elif isinstance(buff, str):
+            self.content.write(buff.encode())
+        elif isinstance(buff, six.binary_type):
+            self.content.write(buff)
+        elif isinstance(buff, io.IOBase):
+            self.content.write(buff.read())
+        else:
+            raise ValueError(f"content type {type(buff)} not recognized")
+
 
     def get_id(self):
         """
@@ -197,7 +211,8 @@ def get_content(self, default=six.b('')):
         :Returns:
           Returns content of the item.
         """
-        return self.content or default
+        self.content.seek(0)
+        return self.content.read() or default
 
     def set_content(self, content):
         """
@@ -206,7 +221,13 @@ def set_content(self, content):
         :Args:
           - content: Content value
         """
-        self.content = content
+        if self.content:
+            self.content.close()
+        if isinstance(content, io.IOBase):
+            self.content = content
+        else:
+            self.content = six.BytesIO()
+            self.write(content)
 
     def __str__(self):
         return '<EpubItem:%s>' % self.id
@@ -349,7 +370,7 @@ def get_body_content(self):
         """
 
         try:
-            html_tree = parse_html_string(self.content)
+            html_tree = parse_html_string(self.get_content())
         except:
             return ''
 
@@ -392,7 +413,7 @@ def get_content(self, default=None):
         #  <meta charset="utf-8" />
 
         try:
-            html_tree = parse_html_string(self.content)
+            html_tree = parse_html_string(super().get_content())
         except:
             return ''
 
@@ -472,7 +493,7 @@ def get_content(self):
           Returns content of this document.
         """
 
-        self.content = self.book.get_template('cover')
+        self.set_content(self.book.get_template('cover'))
 
         tree = parse_string(super(EpubCoverHtml, self).get_content())
         tree_root = tree.getroot()
@@ -649,7 +670,7 @@ def set_cover(self, file_name, content, create_page=True):
 
         # as it is now, it can only be called once
         c0 = EpubCover(file_name=file_name)
-        c0.content = content
+        c0.write(content)
         self.add_item(c0)
 
         if create_page:
@@ -1416,7 +1437,7 @@ def load(self):
     def read_file(self, name):
         # Raises KeyError
         name = zip_path.normpath(name)
-        return self.zf.read(name)
+        return self.zf.open(name)
 
     def _load_container(self):
         meta_inf = self.read_file('META-INF/container.xml')
@@ -1509,20 +1530,20 @@ def _load_manifest(self):
             if media_type == 'application/x-dtbncx+xml':
                 ei = EpubNcx(uid=r.get('id'), file_name=unquote(r.get('href')))
 
-                ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
+                ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.file_name)))
             elif media_type == 'application/smil+xml':
                 ei = EpubSMIL(uid=r.get('id'), file_name=unquote(r.get('href')))
 
-                ei.content = self.read_file(zip_path.join(self.opf_dir, ei.file_name))
+                ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.file_name)))
             elif media_type == 'application/xhtml+xml':
                 if 'nav' in properties:
                     ei = EpubNav(uid=r.get('id'), file_name=unquote(r.get('href')))
 
-                    ei.content = self.read_file(zip_path.join(self.opf_dir, r.get('href')))
+                    ei.set_content(self.read_file(zip_path.join(self.opf_dir, r.get('href'))))
                 elif 'cover' in properties:
                     ei = EpubCoverHtml()
 
-                    ei.content = self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href'))))
+                    ei.set_content(self.read_file(zip_path.join(self.opf_dir, unquote(r.get('href')))))
                 else:
                     ei = EpubHtml()
 
@@ -1531,21 +1552,21 @@ def _load_manifest(self):
                     ei.media_type = media_type
                     ei.media_overlay = r.get('media-overlay', None)
                     ei.media_duration = r.get('duration', None)
-                    ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+                    ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
                     ei.properties = properties
             elif media_type in IMAGE_MEDIA_TYPES:
                 if 'cover-image' in properties:
                     ei = EpubCover(uid=r.get('id'), file_name=unquote(r.get('href')))
 
                     ei.media_type = media_type
-                    ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+                    ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
                 else:
                     ei = EpubImage()
 
                     ei.id = r.get('id')
                     ei.file_name = unquote(r.get('href'))
                     ei.media_type = media_type
-                    ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+                    ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
             else:
                 # different types
                 ei = EpubItem()
@@ -1554,7 +1575,7 @@ def _load_manifest(self):
                 ei.file_name = unquote(r.get('href'))
                 ei.media_type = media_type
 
-                ei.content = self.read_file(zip_path.join(self.opf_dir, ei.get_name()))
+                ei.set_content(self.read_file(zip_path.join(self.opf_dir, ei.get_name())))
 
             self.book.add_item(ei)
 
@@ -1693,7 +1714,7 @@ def _load_opf_file(self):
                     navtype='toc'
                 )
             self._parse_nav(
-                nav_item.content,
+                nav_item.get_content(),
                 zip_path.dirname(nav_item.file_name),
                 navtype='pages'
             )
@@ -1706,7 +1727,8 @@ class Directory:
                 def read(self, subname):
                     with open(os.path.join(file_name, subname), 'rb') as fp:
                         return fp.read()
-
+                def open(self, subname):
+                    return open(os.path.join(file_name, subname), 'rb')
                 def close(self):
                     pass
 

diff --git a/samples/01_basic_create/create.py b/samples/01_basic_create/create.py
@@ -15,11 +15,11 @@
 
     # intro chapter
     c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en')
-    c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>'
+    c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')
 
     # about chapter
     c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
-    c2.content='<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>'
+    c2.content.write(b'<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>')
 
     # add chapters to the book
     book.add_item(c1)

diff --git a/samples/02_cover_create/create.py b/samples/02_cover_create/create.py
@@ -18,11 +18,11 @@
 
     # intro chapter
     c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='hr')
-    c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>'
+    c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')
 
     # about chapter
     c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
-    c2.content='<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p><p><img src="image.jpg" alt="Cover Image"/></p>'
+    c2.content.write(b'<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p><p><img src="image.jpg" alt="Cover Image"/></p>')
 
     # add chapters to the book
     book.add_item(c1)

diff --git a/samples/03_advanced_create/create.py b/samples/03_advanced_create/create.py
@@ -15,7 +15,7 @@
 
     # intro chapter
     c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='hr')
-    c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>'
+    c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')
 
     # defube style
     style = '''BODY { text-align: justify;}'''
@@ -26,7 +26,7 @@
 
     # about chapter
     c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
-    c2.content='<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>'
+    c2.write('<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>')
     c2.set_language('hr')
     c2.properties.append('rendition:layout-pre-paginated rendition:orientation-landscape rendition:spread-none')
     c2.add_item(default_css)

diff --git a/samples/04_markdown_parse/epub2markdown.py b/samples/04_markdown_parse/epub2markdown.py
@@ -20,11 +20,11 @@
             proc = subprocess.Popen(['pandoc', '-f', 'html', '-t', 'markdown', '-'],
                                     stdin=subprocess.PIPE,
                                     stdout=subprocess.PIPE)
-            content, error = proc.communicate(item.content)
+            content, error = proc.communicate(item.get_content())
             file_name = os.path.splitext(item.file_name)[0] + '.md'
         else:
             file_name = item.file_name
-            content = item.content
+            content = item.get_content()
 
         # create needed directories 
         dir_name = '{0}/{1}'.format(base_name, os.path.dirname(file_name))

diff --git a/samples/07_pagebreaks/create.py b/samples/07_pagebreaks/create.py
@@ -17,22 +17,22 @@
 
     # build the chapter HTML and add the page break
     c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en')
-    c1.content = u'<h1>Introduction</h1><p><span id="page1" epub:type="pagebreak">1</span>This chapter has a visible page number.</p><p><span id="page2" epub:type="pagebreak">2</span>Something else now.</p>'
+    c1.set_content(u'<h1>Introduction</h1><p><span id="page1" epub:type="pagebreak">1</span>This chapter has a visible page number.</p><p><span id="page2" epub:type="pagebreak">2</span>Something else now.</p>')
 
     c2 = epub.EpubHtml(title='Chapter the Second', file_name='chap02.xhtml', lang='en')
-    c2.content = u'<html><head></head><body><h1>Chapter the Second</h1><p>This chapter has two page breaks, both with invisible page numbers.</p>'
+    c2.content.write(u'<html><head></head><body><h1>Chapter the Second</h1><p>This chapter has two page breaks, both with invisible page numbers.</p>'.encode())
 
     # Add invisible page numbers that match the printed text, for accessibility
-    c2.content += create_pagebreak("2")
+    c2.write(create_pagebreak("2"))
 
     # You can add more content  after the page break
-    c2.content += u'<p>This is the second page in the second chapter, after the invisible page break.</p>'
+    c2.write(u'<p>This is the second page in the second chapter, after the invisible page break.</p>')
 
     # Add invisible page numbers that match the printed text, for accessibility
-    c2.content += create_pagebreak("3", label="Page 3")
+    c2.write(create_pagebreak("3", label="Page 3"))
 
     # close the chapter
-    c2.content += u'</body></html>'
+    c2.write(u'</body></html>')
 
     # add chapters to the book
     book.add_item(c1)

diff --git a/samples/10_large_files/README.md b/samples/10_large_files/README.md
@@ -0,0 +1,8 @@
+Large create
+============
+
+Creates LARGE EPUB files
+
+## Start
+
+    python create.py
diff --git a/samples/10_large_files/create.py b/samples/10_large_files/create.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+
+from ebooklib import epub
+from tempfile import TemporaryFile
+
+
+if __name__ == '__main__':
+    book = epub.EpubBook()
+
+    # add metadata
+    book.set_identifier('sample123456')
+    book.set_title('Sample book')
+    book.set_language('en')
+
+    book.add_author('Aleksandar Erkalovic')
+
+    # intro chapter
+    c1 = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang='en')
+    c1.set_content(u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph where i explain what is happening.</p></body></html>')
+
+    # about chapter
+    c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml', content=TemporaryFile())
+    c2.write('<h1>About this book</h1>')
+    for i in range(1024):
+        c2.write('<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin rutrum erat ipsum, at fringilla sem sodales ut. Donec rutrum condimentum leo, non convallis ipsum sodales vel. Sed vulputate, quam dapibus pharetra viverra, nunc magna fermentum ligula, sed placerat enim diam id nisi. Sed justo nunc, placerat vel rutrum eget, lacinia quis ante. Maecenas semper turpis lectus, sed sollicitudin diam feugiat vitae. Mauris massa felis, cursus non enim a, consequat pulvinar mauris. Proin scelerisque neque felis, in fringilla ligula tristique ac. Phasellus interdum lacus neque, ac efficitur nibh consequat a. Donec enim enim, commodo sed finibus in, dignissim sit amet arcu. Nam mollis eu ipsum sed ornare. Maecenas non ipsum molestie, volutpat nulla quis, accumsan arcu. Cras imperdiet augue interdum ipsum laoreet malesuada vitae consectetur ipsum. Quisque vitae tortor augue.</p>')
+
+    # add chapters to the book
+    book.add_item(c1)
+    book.add_item(c2)
+
+    # create table of contents
+    # - add section
+    # - add auto created links to chapters
+
+    book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
+                 (epub.Section('Languages'),
+                 (c1, c2))
+                )
+
+    # add navigation files
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+
+    # define css style
+    style = '''
+@namespace epub "http://www.idpf.org/2007/ops";
+
+body {
+    font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif;
+}
+
+h2 {
+     text-align: left;
+     text-transform: uppercase;
+     font-weight: 200;     
+}
+
+ol {
+        list-style-type: none;
+}
+
+ol > li:first-child {
+        margin-top: 0.3em;
+}
+
+
+nav[epub|type~='toc'] > ol > li > ol  {
+    list-style-type:square;
+}
+
+
+nav[epub|type~='toc'] > ol > li > ol > li {
+        margin-top: 0.3em;
+}
+
+'''
+
+    # add css file
+    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
+    book.add_item(nav_css)
+
+    # create spine
+    book.spine = ['nav', c1, c2]
+
+    # create epub file
+    epub.write_epub('test.epub', book, {})
+