merged html log input fix

2025-08-30 23:00:21 -04:00 · 2010-08-28 14:43:41 +10:00 · 2010-08-28 14:43:41 +10:00 · 132df9b6c8
commit 132df9b6c8
parent cdc5b4d771 1f922ddb5a
2 changed files with 58 additions and 0 deletions
--- a/resources/recipes/hawaii.recipe
+++ b/resources/recipes/hawaii.recipe
@ -0,0 +1,55 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'West Hawaii Today'
    language = 'en'
    __author__ = 'Tony Stegall'
    description = 'Westhawaiitoday.com'
    publisher = 'West Hawaii '
    category = 'news,Hawaii,USA'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_javascript = True
    masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'
    feeds          = [
                       ('Local News',     'http://www.westhawaiitoday.com/?rss=local/'),
                       ('Local Sports',   'http://www.westhawaiitoday.com/?rss=sports/local_sports/'),
                       ('Big Fish List',  'http://www.westhawaiitoday.com/?rss=sports/big_fish_list/'),
                       ('Local Features'  'http://www.westhawaiitoday.com/?rss=features/'),
                       ('Obituaries',     'http://www.westhawaiitoday.com/?rss=obituaries/'),
                       ('Letters To Editor',  'http://www.westhawaiitoday.com/?rss=opinion/letters_-_your_voice/'),
                       ('Editorial',     'http://www.westhawaiitoday.com/?rss=opinion/editorial/'),
                       ('Columns',       'http://www.westhawaiitoday.com/?rss=opinion/columns/'),
                       ('Volcano Update Sunday',   'http://www.westhawaiitoday.com/?rss=volcano/')
                    ]
    def print_version(self, url):
        split1 = url.split("//")
        url1 = split1[1]
        xxx = split1[2]
        split2 = xxx.split(".")
        artid = split2[0]
        print 'ARTICLE ID IS: ', artid
        #example of link to convert
        #Original link: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.txt
        #print version: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.prt
        print_url = 'http://' + url1 + '//' + artid + '.prt'
        print 'print_url is: ', print_url
        return print_url
        #test with ebook-convert hawaii.recipe output_dir --test -vv > myrecipe.txt
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -491,6 +491,9 @@ class HTMLInput(InputFormatPlugin):
        return (None, raw)
 	def preprocess_html(self, html):
        if not hasattr(self, 'log'):
            from calibre.utils.logging import default_log
            self.log = default_log
 		self.log("*********  Preprocessing HTML  *********")
 		# Detect Chapters to match the xpath in the GUI
 		chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)