mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merged html log input fix
This commit is contained in:
commit
132df9b6c8
55
resources/recipes/hawaii.recipe
Normal file
55
resources/recipes/hawaii.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
title = 'West Hawaii Today'
|
||||
language = 'en'
|
||||
__author__ = 'Tony Stegall'
|
||||
description = 'Westhawaiitoday.com'
|
||||
publisher = 'West Hawaii '
|
||||
category = 'news,Hawaii,USA'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
('Local News', 'http://www.westhawaiitoday.com/?rss=local/'),
|
||||
('Local Sports', 'http://www.westhawaiitoday.com/?rss=sports/local_sports/'),
|
||||
('Big Fish List', 'http://www.westhawaiitoday.com/?rss=sports/big_fish_list/'),
|
||||
('Local Features' 'http://www.westhawaiitoday.com/?rss=features/'),
|
||||
('Obituaries', 'http://www.westhawaiitoday.com/?rss=obituaries/'),
|
||||
('Letters To Editor', 'http://www.westhawaiitoday.com/?rss=opinion/letters_-_your_voice/'),
|
||||
('Editorial', 'http://www.westhawaiitoday.com/?rss=opinion/editorial/'),
|
||||
('Columns', 'http://www.westhawaiitoday.com/?rss=opinion/columns/'),
|
||||
('Volcano Update Sunday', 'http://www.westhawaiitoday.com/?rss=volcano/')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
split1 = url.split("//")
|
||||
url1 = split1[1]
|
||||
xxx = split1[2]
|
||||
split2 = xxx.split(".")
|
||||
artid = split2[0]
|
||||
print 'ARTICLE ID IS: ', artid
|
||||
|
||||
#example of link to convert
|
||||
#Original link: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.txt
|
||||
#print version: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.prt
|
||||
|
||||
print_url = 'http://' + url1 + '//' + artid + '.prt'
|
||||
print 'print_url is: ', print_url
|
||||
return print_url
|
||||
|
||||
#test with ebook-convert hawaii.recipe output_dir --test -vv > myrecipe.txt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -491,6 +491,9 @@ class HTMLInput(InputFormatPlugin):
|
||||
return (None, raw)
|
||||
|
||||
def preprocess_html(self, html):
|
||||
if not hasattr(self, 'log'):
|
||||
from calibre.utils.logging import default_log
|
||||
self.log = default_log
|
||||
self.log("********* Preprocessing HTML *********")
|
||||
# Detect Chapters to match the xpath in the GUI
|
||||
chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)
|
||||
|
Loading…
x
Reference in New Issue
Block a user