diff --git a/newspaper/cleaners.py b/newspaper/cleaners.py index 47b6f1a89..610156f31 100644 --- a/newspaper/cleaners.py +++ b/newspaper/cleaners.py @@ -47,7 +47,7 @@ def __init__(self, config): .create("\n", "\n\n")\ .append("\t")\ .append("^\\s+$") - self.contains_article = './/article|.//*[@id="article"]|.//*[@itemprop="articleBody"]' + self.contains_article = './/article|.//*[@id="article"]|.//*[contains(@itemprop,"articleBody")]' def clean(self, doc_to_clean): """Remove chunks of the DOM as specified