merged html log input fix

This commit is contained in:
ldolse 2010-08-28 14:43:41 +10:00
commit 132df9b6c8
2 changed files with 58 additions and 0 deletions

View File

@ -0,0 +1,55 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'West Hawaii Today'
language = 'en'
__author__ = 'Tony Stegall'
description = 'Westhawaiitoday.com'
publisher = 'West Hawaii '
category = 'news,Hawaii,USA'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'
feeds = [
('Local News', 'http://www.westhawaiitoday.com/?rss=local/'),
('Local Sports', 'http://www.westhawaiitoday.com/?rss=sports/local_sports/'),
('Big Fish List', 'http://www.westhawaiitoday.com/?rss=sports/big_fish_list/'),
('Local Features' 'http://www.westhawaiitoday.com/?rss=features/'),
('Obituaries', 'http://www.westhawaiitoday.com/?rss=obituaries/'),
('Letters To Editor', 'http://www.westhawaiitoday.com/?rss=opinion/letters_-_your_voice/'),
('Editorial', 'http://www.westhawaiitoday.com/?rss=opinion/editorial/'),
('Columns', 'http://www.westhawaiitoday.com/?rss=opinion/columns/'),
('Volcano Update Sunday', 'http://www.westhawaiitoday.com/?rss=volcano/')
]
def print_version(self, url):
split1 = url.split("//")
url1 = split1[1]
xxx = split1[2]
split2 = xxx.split(".")
artid = split2[0]
print 'ARTICLE ID IS: ', artid
#example of link to convert
#Original link: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.txt
#print version: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.prt
print_url = 'http://' + url1 + '//' + artid + '.prt'
print 'print_url is: ', print_url
return print_url
#test with ebook-convert hawaii.recipe output_dir --test -vv > myrecipe.txt

View File

@ -491,6 +491,9 @@ class HTMLInput(InputFormatPlugin):
return (None, raw) return (None, raw)
def preprocess_html(self, html): def preprocess_html(self, html):
if not hasattr(self, 'log'):
from calibre.utils.logging import default_log
self.log = default_log
self.log("********* Preprocessing HTML *********") self.log("********* Preprocessing HTML *********")
# Detect Chapters to match the xpath in the GUI # Detect Chapters to match the xpath in the GUI
chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE) chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)