mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merged html log input fix
This commit is contained in:
commit
132df9b6c8
55
resources/recipes/hawaii.recipe
Normal file
55
resources/recipes/hawaii.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
|
title = 'West Hawaii Today'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Tony Stegall'
|
||||||
|
description = 'Westhawaiitoday.com'
|
||||||
|
publisher = 'West Hawaii '
|
||||||
|
category = 'news,Hawaii,USA'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Local News', 'http://www.westhawaiitoday.com/?rss=local/'),
|
||||||
|
('Local Sports', 'http://www.westhawaiitoday.com/?rss=sports/local_sports/'),
|
||||||
|
('Big Fish List', 'http://www.westhawaiitoday.com/?rss=sports/big_fish_list/'),
|
||||||
|
('Local Features' 'http://www.westhawaiitoday.com/?rss=features/'),
|
||||||
|
('Obituaries', 'http://www.westhawaiitoday.com/?rss=obituaries/'),
|
||||||
|
('Letters To Editor', 'http://www.westhawaiitoday.com/?rss=opinion/letters_-_your_voice/'),
|
||||||
|
('Editorial', 'http://www.westhawaiitoday.com/?rss=opinion/editorial/'),
|
||||||
|
('Columns', 'http://www.westhawaiitoday.com/?rss=opinion/columns/'),
|
||||||
|
('Volcano Update Sunday', 'http://www.westhawaiitoday.com/?rss=volcano/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
split1 = url.split("//")
|
||||||
|
url1 = split1[1]
|
||||||
|
xxx = split1[2]
|
||||||
|
split2 = xxx.split(".")
|
||||||
|
artid = split2[0]
|
||||||
|
print 'ARTICLE ID IS: ', artid
|
||||||
|
|
||||||
|
#example of link to convert
|
||||||
|
#Original link: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.txt
|
||||||
|
#print version: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.prt
|
||||||
|
|
||||||
|
print_url = 'http://' + url1 + '//' + artid + '.prt'
|
||||||
|
print 'print_url is: ', print_url
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
#test with ebook-convert hawaii.recipe output_dir --test -vv > myrecipe.txt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -491,6 +491,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
return (None, raw)
|
return (None, raw)
|
||||||
|
|
||||||
def preprocess_html(self, html):
|
def preprocess_html(self, html):
|
||||||
|
if not hasattr(self, 'log'):
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
self.log = default_log
|
||||||
self.log("********* Preprocessing HTML *********")
|
self.log("********* Preprocessing HTML *********")
|
||||||
# Detect Chapters to match the xpath in the GUI
|
# Detect Chapters to match the xpath in the GUI
|
||||||
chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)
|
chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user