diff --git a/wagtail_wordpress_import/block_builder.py b/wagtail_wordpress_import/block_builder.py
index 4f59e3e8..c180db1c 100644
--- a/wagtail_wordpress_import/block_builder.py
+++ b/wagtail_wordpress_import/block_builder.py
@@ -1,278 +1,106 @@
-import requests
from bs4 import BeautifulSoup
-from django.core.files import File
-from django.core.files.temp import NamedTemporaryFile
-from wagtail.images.models import Image as ImportedImage
+from django.conf import settings
+from django.utils.module_loading import import_string
-TAGS_TO_BLOCKS = [
- "table",
- "iframe",
- "form",
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6",
- "img",
- "blockquote",
-]
+from wagtail_wordpress_import.block_builder_defaults import (
+ conf_fallback_block,
+ conf_html_tags_to_blocks,
+)
-IRELLEVANT_PARENTS = ["p", "div", "span"]
-# this is not what i'd expect to see but some images return text/html CDN maybe?
-VALID_IMAGE_CONTENT_TYPES = [
- "image/gif",
- "image/jpeg",
- "image/png",
- "image/webp",
- "text/html",
-]
-
-IMAGE_SRC_DOMAIN = "https://www.budgetsaresexy.com" # note no trailing /
+def conf_promote_child_tags():
+ return getattr(
+ settings,
+ "WAGTAIL_WORDPRESS_IMPORTER_PROMOTE_CHILD_TAGS",
+ {
+ "TAGS_TO_PROMOTE": ["iframe", "form", "blockquote"],
+ "PARENTS_TO_REMOVE": ["p", "div", "span"],
+ },
+ )
class BlockBuilder:
def __init__(self, value, node, logger):
- self.soup = BeautifulSoup(value, "lxml", exclude_encodings=True)
- self.blocks = []
+ self.soup = BeautifulSoup(value, "lxml")
+ self.blocks = [] # for each page this holds the sequence of StreamBlocks
self.logged_items = {"processed": 0, "imported": 0, "skipped": 0, "items": []}
self.node = node
self.logger = logger
- self.set_up()
- def set_up(self):
+ def promote_child_tags(self):
"""
- iframes, forms can get put inside a p tag, pull them out
- extend this to add further tags
+ Some HTML tags that can be at the top level, e.g. the parent is the
+ body when using BS4 are getting placed inside or existed inside other HTML tags.
+ We pull out these HTML tags and move them to the top level.
+ returns: None
+ but modifies the page soup
"""
- for iframe in self.soup.find_all("iframe"):
- parent = iframe.previous_element
- if parent.name in IRELLEVANT_PARENTS:
- parent.replaceWith(iframe)
+ config_promote_child_tags = conf_promote_child_tags()
+ promotee_tags = config_promote_child_tags["TAGS_TO_PROMOTE"]
+ removee_tags = config_promote_child_tags["PARENTS_TO_REMOVE"]
- for form in self.soup.find_all("form"):
- parent = form.previous_element
- if parent.name in IRELLEVANT_PARENTS:
- parent.replaceWith(form)
+ for promotee in promotee_tags:
+ promotees = self.soup.findAll(promotee)
+ for promotee in promotees:
+ if promotee.parent.name in removee_tags:
+ promotee.parent.replace_with(promotee)
- for blockquote in self.soup.find_all("blockquote"):
- parent = blockquote.previous_element
- if parent.name in IRELLEVANT_PARENTS:
- parent.replaceWith(blockquote)
+ def get_builder_function(self, element):
+ """
+ params
+ element: an HTML tag
+ returns:
+ a function to parse the block from configuration
+ """
+ function = [
+ import_string(builder[1]["FUNCTION"])
+ for builder in conf_html_tags_to_blocks()
+ if element.name == builder[0]
+ ]
+ if function:
+ return function[0]
def build(self):
+ """
+ params:
+ None
+ returns:
+ a list of block dicts
+
+ The value to be processed her should have only top level HTML tags.
+ The HTML is parsed to a sequence of StreamField blocks.
+ If a HTML tag does have child blocks we should parse then inside the
+ build_block_* method
+ """
soup = self.soup.find("body").findChildren(recursive=False)
- block_value = str("")
+ cached_fallback_value = (
+ "" # keep appending fall back content here, by default is Rich Text block
+ )
+ cached_fallback_function = import_string(
+ conf_fallback_block()
+ ) # Rich Text block
counter = 0
-
- for tag in soup:
+ for element in soup: # each single top level tag
counter += 1
- """
- the process here loops though each soup tag to discover
- the block type to use
- """
-
- # RICHTEXT
- if tag.name not in TAGS_TO_BLOCKS:
- block_value += str(self.image_linker(str(tag)))
-
- # TABLE
- if tag.name == "table":
- if len(block_value) > 0:
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
- self.blocks.append({"type": "raw_html", "value": str(tag)})
-
- # IFRAME/EMBED
- if tag.name == "iframe":
- if len(block_value) > 0:
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
- self.blocks.append(
- {
- "type": "raw_html",
- "value": '
{}
'.format(
- str(tag)
- ),
- }
+ # the builder function for the element tag from config
+ builder_function = self.get_builder_function(element)
+
+ if builder_function: # build a block
+ if cached_fallback_value:
+ cached_fallback_value = cached_fallback_function(
+ cached_fallback_value,
+ self.blocks,
+ ) # before building a block write fall back cache to a block
+ self.blocks.append(builder_function(element)) # write the new block
+ else:
+ if element.text.strip(): # exclude a tag that is empty
+ cached_fallback_value += str(element)
+
+ if cached_fallback_value and counter == len(
+ soup
+ ): # the last tag so just build whats left in the fall back cache
+ cached_fallback_value = cached_fallback_function(
+ cached_fallback_value, self.blocks
)
- # FORM
- if tag.name == "form":
- if len(block_value) > 0:
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
- self.blocks.append({"type": "raw_html", "value": str(tag)})
-
- # HEADING
- if (
- tag.name == "h1"
- or tag.name == "h2"
- or tag.name == "h3"
- or tag.name == "h4"
- or tag.name == "h5"
- or tag.name == "h6"
- ):
- if len(block_value) > 0:
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
- self.blocks.append(
- {
- "type": "heading",
- "value": {"importance": tag.name, "text": str(tag.text)},
- }
- )
-
- # IMAGE
- if tag.name == "img":
- if len(block_value) > 0:
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
- self.blocks.append({"type": "raw_html", "value": str(tag)})
-
- # BLOCKQUOTE
- if tag.name == "blockquote":
- if len(block_value) > 0:
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
- cite = ""
- if tag.attrs and tag.attrs.get("cite"):
- cite = str(tag.attrs["cite"])
- self.blocks.append(
- {
- "type": "block_quote",
- "value": {"quote": str(tag.text), "attribution": cite},
- }
- )
-
- if counter == len(soup) and len(block_value) > 0:
- # when we reach the end and something is in the
- # block_value just output and clear
- self.blocks.append({"type": "rich_text", "value": block_value})
- block_value = str("")
-
- # print(self.logged_items)
return self.blocks
-
- def image_linker(self, tag):
- soup = BeautifulSoup(tag, "html.parser", exclude_encodings=True)
- images = soup.find_all("img")
-
- for image in images:
- image_saved = self.get_image(image)
- if image_saved:
- alignment = self.get_alignment_class(image)
- img_alt = image.attrs["alt"] if "alt" in image.attrs else None
- tag = ''.format(
- image_saved.id, img_alt, alignment
- )
-
- return tag
-
- def get_alignment_class(self, image):
- alignment = "fullwidth"
-
- if "class" in image.attrs:
- if "align-left" in image.attrs["class"]:
- alignment = "left"
- elif "align-right" in image.attrs["class"]:
- alignment = "right"
-
- return alignment
-
- def get_image(self, image):
-
- if image.get("src"):
- name = image.get("src").split("/")[-1] # need the last part
- temp = NamedTemporaryFile(delete=True)
- image_src = check_image_src(image.get("src")).strip("/")
- else:
- self.logged_items["items"].append(
- {
- "id": self.node.get("wp:post_id"),
- "title": self.node.get("title"),
- "link": self.node.get("link"),
- "reason": "no src provided",
- }
- )
- self.logger.images.append(
- {
- "id": self.node.get("wp:post_id"),
- "title": self.node.get("title"),
- "link": self.node.get("link"),
- "reason": "no src provided",
- }
- )
- return
-
- try:
- image_exists = ImportedImage.objects.get(title=name)
- return image_exists
-
- except ImportedImage.DoesNotExist:
-
- try:
- response = requests.get(image_src, timeout=10)
- status_code = response.status_code
- content_type = response.headers.get("Content-Type")
-
- if (
- content_type
- and content_type.lower() not in VALID_IMAGE_CONTENT_TYPES
- ):
- self.logged_items["items"].append(
- {
- "id": self.node.get("wp:post_id"),
- "title": self.node.get("title"),
- "link": self.node.get("link"),
- "reason": "invalid image types match or no content type",
- }
- )
- self.logger.images.append(
- {
- "id": self.node.get("wp:post_id"),
- "title": self.node.get("title"),
- "link": self.node.get("link"),
- "reason": "invalid image types match or no content type",
- }
- )
- return
-
- if status_code == 200:
- temp.name = name
- temp.write(response.content)
- temp.flush()
- new_image = ImportedImage(file=File(file=temp), title=name)
- new_image.save()
- return new_image
-
- except requests.exceptions.ConnectionError:
- self.logged_items["items"].append(
- {
- "id": self.node.get("wp:post_id"),
- "title": self.node.get("title"),
- "link": self.node.get("link"),
- "reason": "connection error",
- }
- )
- self.logger.images.append(
- {
- "id": self.node.get("wp:post_id"),
- "title": self.node.get("title"),
- "link": self.node.get("link"),
- "reason": "connection error",
- }
- )
-
-
-def check_image_src(src):
- # some images have relative src values
- if not src.startswith("http"):
- print(
- "WARNING: relative file {}. Image may be broken, trying with domain name prepended. ".format(
- src
- )
- )
- return IMAGE_SRC_DOMAIN + "/" + src
- return src
diff --git a/wagtail_wordpress_import/block_builder_defaults.py b/wagtail_wordpress_import/block_builder_defaults.py
new file mode 100644
index 00000000..2fbbabbb
--- /dev/null
+++ b/wagtail_wordpress_import/block_builder_defaults.py
@@ -0,0 +1,290 @@
+import re
+
+import requests
+from bs4 import BeautifulSoup
+from django.conf import settings
+from django.core.files import File
+from django.core.files.temp import NamedTemporaryFile
+from wagtail.images.models import Image as ImportedImage
+
+"""StreamField blocks"""
+
+
+def build_block_quote_block(tag):
+ block_dict = {
+ "type": "block_quote",
+ "value": {"quote": tag.text.strip(), "attribution": tag.cite},
+ }
+ return block_dict
+
+
+def build_form_block(tag):
+ block_dict = {"type": "raw_html", "value": str(tag)}
+ return block_dict
+
+
+def build_heading_block(tag):
+ block_dict = {
+ "type": "heading",
+ "value": {"importance": tag.name, "text": tag.text},
+ }
+ return block_dict
+
+
+def build_iframe_block(tag):
+ block_dict = {
+ "type": "raw_html",
+ "value": '
{}
'.format(
+ str(tag)
+ ),
+ }
+ return block_dict
+
+
+def build_image_block(tag):
+ def get_image_id(src):
+ return 1
+
+ block_dict = {"type": "image", "value": get_image_id(tag.src)}
+ return block_dict
+
+
+def build_table_block(tag):
+ block_dict = {"type": "raw_html", "value": str(tag)}
+ return block_dict
+
+
+def conf_html_tags_to_blocks():
+ return getattr(
+ settings,
+ "WAGTAIL_WORDPRESS_IMPORTER_CONVERT_HTML_TAGS_TO_BLOCKS",
+ [
+ (
+ "h1",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_heading_block",
+ },
+ ),
+ (
+ "h2",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_heading_block",
+ },
+ ),
+ (
+ "h3",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_heading_block",
+ },
+ ),
+ (
+ "h4",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_heading_block",
+ },
+ ),
+ (
+ "h5",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_heading_block",
+ },
+ ),
+ (
+ "h6",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_heading_block",
+ },
+ ),
+ (
+ "table",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_table_block",
+ },
+ ),
+ (
+ "iframe",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_iframe_block",
+ },
+ ),
+ (
+ "form",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_form_block",
+ },
+ ),
+ (
+ "img",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_image_block",
+ },
+ ),
+ (
+ "blockquote",
+ {
+ "FUNCTION": "wagtail_wordpress_import.block_builder_defaults.build_block_quote_block",
+ },
+ ),
+ ],
+ )
+
+
+"""Fall back StreamField block"""
+
+
+def conf_fallback_block():
+ return getattr(
+ settings,
+ "WAGTAIL_WORDPRESS_IMPORTER_FALLBACK_BLOCK",
+ "wagtail_wordpress_import.block_builder_defaults.build_none_block_content",
+ )
+
+
+def build_none_block_content(cache, blocks):
+ """
+ image_linker is called to link up and retrive the remote image
+ """
+ blocks.append({"type": "rich_text", "value": image_linker(cache)})
+ cache = ""
+ return cache
+
+
+"""Rich Text Functions"""
+
+
+def conf_valid_image_content_types():
+ return getattr(
+ settings,
+ "WAGTAIL_WORDPRESS_IMPORTER_VALID_IMAGE_CONTENT_TYPES",
+ [
+ "image/gif",
+ "image/jpeg",
+ "image/png",
+ "image/webp",
+ "text/html",
+ ],
+ )
+
+
+def conf_domain_prefix():
+
+ if hasattr(settings, "WAGTAIL_WORDPRESS_IMPORTER_BASE_URL"):
+ return getattr(settings, "WAGTAIL_WORDPRESS_IMPORTER_BASE_URL")
+
+ elif not hasattr(settings, "WAGTAIL_WORDPRESS_IMPORTER_BASE_URL") and hasattr(
+ settings, "BASE_URL"
+ ):
+ return getattr(settings, "BASE_URL")
+
+
+def image_linker(html):
+ """
+ params
+ ======
+ html: html from a single rich_text block
+
+ returns
+ =======
+ string: the html with img tags modified
+
+ BS4 performs a find and replace on all img tags found in the HTML.
+ If the image can be retrived from the remote site and saved into a Wagtail ImageModel
+ the soup is modified.
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ images = soup.find_all("img")
+ for image in images:
+ if image.attrs and image.attrs["src"]:
+ image_src = get_abolute_src(image.attrs["src"], conf_domain_prefix())
+ saved_image = get_or_save_image(image_src)
+ if saved_image:
+ image_embed = soup.new_tag("embed")
+ image_embed.attrs["embedtype"] = "image"
+ image_embed.attrs["id"] = saved_image.id
+ image_embed.attrs["alt"] = get_image_alt(image)
+ image_embed.attrs["format"] = get_alignment_class(image)
+ image.replace_with(image_embed)
+ else:
+ print(f"IMAGE HAS NO SRC: {image}")
+
+ return str(soup)
+
+
+def get_image_alt(img_tag):
+ return img_tag.attrs["alt"] if "alt" in img_tag.attrs else None
+
+
+def get_image_file_name(src):
+ return src.split("/")[-1] if src else None # need the last part
+
+
+def image_exists(name):
+ try:
+ return ImportedImage.objects.get(title=name)
+ except ImportedImage.DoesNotExist:
+ pass
+
+
+def conf_get_requests_settings():
+ return getattr(
+ settings,
+ "WAGTAIL_WORDPRESS_IMPORTER_REQUESTS_SETTINGS",
+ {
+ "headers": {"User-Agent": "WagtailWordpressImporter"},
+ "timeout": 1,
+ "stream": False,
+ },
+ )
+
+
+def get_or_save_image(src):
+ image_file_name = get_image_file_name(src)
+ existing_image = image_exists(image_file_name)
+ if not existing_image:
+ response, valid, type = fetch_url(src)
+ if valid and (type in conf_valid_image_content_types()):
+ temp_image = NamedTemporaryFile(delete=True)
+ temp_image.name = image_file_name
+ temp_image.write(response.content)
+ temp_image.flush()
+ retrieved_image = ImportedImage(
+ file=File(file=temp_image), title=image_file_name
+ )
+ retrieved_image.save()
+ temp_image.close()
+ return retrieved_image
+ else:
+ print(f"RECEIVED INVALID RESPONSE: {src}")
+ return existing_image
+
+
+def fetch_url(src, r=None, status=False, content_type=None):
+ """general purpose url fetcher with ability to pass in own config"""
+ try:
+ r = requests.get(src, **conf_get_requests_settings())
+ status = r.status_code == 200
+ content_type = (
+ r.headers["content-type"].lower() if r.headers.get("content-type") else ""
+ )
+ except requests.ConnectTimeout:
+ print(f"THERE WAS A PROBLEM WITH REQUESTS FETCHING: {src}")
+ return r, status, content_type
+
+
+def get_abolute_src(src, domain_prefix=None):
+ src = re.sub("^\/+", "", src)
+ if not src.startswith("http") and domain_prefix:
+ return domain_prefix + "/" + src
+ return src
+
+
+def get_alignment_class(image):
+ alignment = "fullwidth"
+
+ if "class" in image.attrs:
+ if "align-left" in image.attrs["class"]:
+ alignment = "left"
+ elif "align-right" in image.attrs["class"]:
+ alignment = "right"
+
+ return alignment
diff --git a/wagtail_wordpress_import/importers/wordpress.py b/wagtail_wordpress_import/importers/wordpress.py
index 320fc188..97b74977 100644
--- a/wagtail_wordpress_import/importers/wordpress.py
+++ b/wagtail_wordpress_import/importers/wordpress.py
@@ -52,8 +52,10 @@ def run(self, *args, **kwargs):
exit()
for event, node in xml_doc:
- # each node represents a tag in the xml
- # event is true for the start element
+ """
+ Each node represents a tag in the xml.
+ `event` is true for a start element.
+ """
if event == pulldom.START_ELEMENT and node.tagName == "item":
xml_doc.expandNode(node)
item = node_to_dict(node)
@@ -144,8 +146,10 @@ def analyze_html(self, html_analyzer, *, page_types, page_statuses):
xml_doc = pulldom.parse(self.xml_file)
for event, node in xml_doc:
- # each node represents a tag in the xml
- # event is true for the start element
+ """
+ Each node represents a tag in the xml.
+ `event` is true for a start element.
+ """
if event == pulldom.START_ELEMENT and node.tagName == "item":
xml_doc.expandNode(node)
item = node_to_dict(node)
@@ -217,10 +221,8 @@ def __init__(self, node, logger):
def prefilter_content(self, content):
"""
- FILTERS ARE CUMULATIVE
- cache the result of each filter which is run on the output from the previous filter
+ FILTERS ARE CUMULATIVE: Each filter receives the output from the previous filter.
"""
-
cached_result = content
for filter in default_prefilters():
@@ -236,11 +238,11 @@ def cleaned_title(self):
def cleaned_slug(self):
"""
- Oddly some page have no slug and some have illegal characters!
- If None make one from title.
- Also pass any slug through slugify to be sure and if it's changed make a note
+ Clean up the slugs from the XML import file
+ Some pages have no slug and some have unexpected characters.
+ If a slug is not provided create one from page title.
+ If a slug is changed its recorded in the logger
"""
-
if not self.node["wp:post_name"]:
slug = slugify(self.cleaned_title())
self.slug_changed = "blank slug" # logging
@@ -264,9 +266,9 @@ def cleaned_latest_revision_created_at(self):
def clean_date(self, value):
"""
- We need a nice date to be able to save the page later. Some dates are not suitable
- date strings in the xml. If thats the case return a specific date so it can be saved
- and return the failure for logging
+ We need a proper date format.
+ Some dates are not suitable date strings in the xml, if so return a
+ specific date so it can be saved in Wagtail and record it in the logger.
"""
if value == "0000-00-00 00:00:00":
@@ -289,7 +291,9 @@ def cleaned_link(self):
return str(self.node["link"].strip())
def body_stream_field(self, content):
- blocks_dict = BlockBuilder(content, self.node, self.logger).build()
+ builder = BlockBuilder(content, self.node, self.logger)
+ builder.promote_child_tags()
+ blocks_dict = builder.build()
if debug_enabled():
self.debug_content["block_json"] = blocks_dict
return json.dumps(blocks_dict)
@@ -301,7 +305,7 @@ def get_yoast_description_value(self):
This parses the wp:postmeta field to check if a _yoast_wpseo_metadesc
is available. If not it returns a blank string or the default description field
- in the XML ... .
+ from the XML import file ...
"""
meta_value = ""
@@ -335,8 +339,7 @@ def cleaned_data(self):
which imports to a standard Wagtail field.
This came out of dealing with the Yoast search_description field which we have
- included and can be configured by a developer to accept different values as
- the wp:postmeta keys
+ included and can be configured to accept different values that are in wp:postmeta keys
"""
return {
"title": self.cleaned_title(),
diff --git a/wagtail_wordpress_import/management/commands/import_xml.py b/wagtail_wordpress_import/management/commands/import_xml.py
index db896c95..76cc6207 100644
--- a/wagtail_wordpress_import/management/commands/import_xml.py
+++ b/wagtail_wordpress_import/management/commands/import_xml.py
@@ -3,6 +3,7 @@
from django.core.management.base import BaseCommand
from wagtail_wordpress_import.importers.wordpress import WordpressImporter
from wagtail_wordpress_import.logger import Logger
+from wagtail_wordpress_import.block_builder_defaults import conf_domain_prefix
LOG_DIR = "log"
@@ -56,6 +57,13 @@ def add_arguments(self, parser):
)
def handle(self, **options):
+ if not conf_domain_prefix():
+ self.stdout.write(
+ self.style.ERROR(
+ "BASE_URL or WAGTAIL_WORDPRESS_IMPORTER_BASE_URL: needs to be added to your settings"
+ )
+ )
+ exit()
xml_file_path = self.get_xml_file(f"{options['xml_file']}")
logger = Logger(LOG_DIR)
importer = WordpressImporter(xml_file_path)
diff --git a/wagtail_wordpress_import/prefilters/transform_styles_defaults.py b/wagtail_wordpress_import/prefilters/transform_styles_defaults.py
index 142b89e8..a075376c 100644
--- a/wagtail_wordpress_import/prefilters/transform_styles_defaults.py
+++ b/wagtail_wordpress_import/prefilters/transform_styles_defaults.py
@@ -43,40 +43,50 @@ def transform_style_center(soup, tag):
"""
apply a new css class to any existing classes
"""
- _class = tag.get("class", "") + " align-center"
- tag.attrs["class"] = _class.strip()
+ if tag.attrs.get("class"):
+ tag.attrs["class"].append("align-center")
+ else:
+ tag.attrs["class"] = "align-center"
def transform_style_left(soup, tag):
"""
apply a new css class to any existing classes
"""
- _class = tag.get("class", "") + " align-left"
- tag.attrs["class"] = _class.strip()
+ if tag.attrs.get("class"):
+ tag.attrs["class"].append("align-left")
+ else:
+ tag.attrs["class"] = "align-left"
def transform_style_right(soup, tag):
"""
apply a new css class to any existing classes
"""
- _class = tag.get("class", "") + " align-right"
- tag.attrs["class"] = _class.strip()
+ if tag.attrs.get("class"):
+ tag.attrs["class"].append("align-right")
+ else:
+ tag.attrs["class"] = "align-right"
def transform_float_left(soup, tag):
"""
apply a new css class to any existing classes
"""
- _class = tag.get("class", "") + " float-left"
- tag.attrs["class"] = _class.strip()
+ if tag.attrs.get("class"):
+ tag.attrs["class"].append("float-left")
+ else:
+ tag.attrs["class"] = "float-left"
def transform_float_right(soup, tag):
"""
apply a new css class to any existing classes
"""
- _class = tag.get("class", "") + " float-right"
- tag.attrs["class"] = _class.strip()
+ if tag.attrs.get("class"):
+ tag.attrs["class"].append("float-right")
+ else:
+ tag.attrs["class"] = "float-right"
def conf_styles_mapping():
diff --git a/wagtail_wordpress_import/templates/wagtail_wordpress_import/quote_block.html b/wagtail_wordpress_import/templates/wagtail_wordpress_import/quote_block.html
index 33bea5ac..9540a522 100644
--- a/wagtail_wordpress_import/templates/wagtail_wordpress_import/quote_block.html
+++ b/wagtail_wordpress_import/templates/wagtail_wordpress_import/quote_block.html
@@ -1,4 +1,3 @@
-
-
{{ value.quote }}
- {% if value.attribution %}{{ value.attribution }}{% endif %}
+
+ {{ value.quote }}
diff --git a/wagtail_wordpress_import/test/fixtures/raw_html.txt b/wagtail_wordpress_import/test/fixtures/raw_html.txt
index fe36583c..fdb17726 100644
--- a/wagtail_wordpress_import/test/fixtures/raw_html.txt
+++ b/wagtail_wordpress_import/test/fixtures/raw_html.txt
@@ -1,9 +1,18 @@
+
+
Lorem ipsum (xcounterx) dolor sit ametLorem ipsum dolor sit (xcounterx) amet!
Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua.?
-
-
-
-
Item
-
Amount
-
-
-
-
-
TOTAL:
-
$1,127.67
-
-
-
-
-
Lorem 1
-
Lorem 1/1
-
-
-
Lorem 2
-
Lorem 2/1
-
-
+
Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore
+ magna aliqua.?
+
+
+
+
+
Item
+
Amount
+
+
+
+
+
TOTAL:
+
$1,127.67
+
+
+
+
+
Lorem 1
+
Lorem 1/1
+
+
+
Lorem 2
+
Lorem 2/1
+
+
-
-
Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua.
-Nihil hic munitissimus habendi senatus locus, nihil horum?.
+
+
+
+
+
+
+
+
+
+
+
+
Lorem ipsum dolor sit amet, consectetur adipisici
+ elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua.
+
+
Lorem ipsum dolor sit amet, consectetur
+ adipisici
+ elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua.