From 97b8daee41ce47c385000003075107fb4f16fe3a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 22 Feb 2015 14:57:19 +0530 Subject: [PATCH] Get rid of cssselect from readability --- src/calibre/ebooks/readability/htmls.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/readability/htmls.py b/src/calibre/ebooks/readability/htmls.py index b30196b3b2..d528c86a0e 100644 --- a/src/calibre/ebooks/readability/htmls.py +++ b/src/calibre/ebooks/readability/htmls.py @@ -67,12 +67,19 @@ def shorten_title(doc): if e.text_content(): add_match(candidates, e.text_content(), orig) - from cssselect import HTMLTranslator - css_to_xpath = HTMLTranslator().css_to_xpath - for item in ('#title', '#head', '#heading', '.pageTitle', '.news_title', - '.title', '.head', '.heading', '.contentheading', - '.small_header_red'): - for e in doc.xpath(css_to_xpath(item)): + for item in [ + "descendant-or-self::*[@id = 'title']", + "descendant-or-self::*[@id = 'head']", + "descendant-or-self::*[@id = 'heading']", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' pageTitle ')]", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' news_title ')]", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' title ')]", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' head ')]", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' heading ')]", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' contentheading ')]", + "descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' small_header_red ')]" + ]: + for e in doc.xpath(item): if e.text: add_match(candidates, e.text, orig) if e.text_content():