diff --git a/resources/recipes/hawaii.recipe b/resources/recipes/hawaii.recipe new file mode 100644 index 0000000000..30a2b26def --- /dev/null +++ b/resources/recipes/hawaii.recipe @@ -0,0 +1,55 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1282101454(BasicNewsRecipe): + title = 'West Hawaii Today' + language = 'en' + __author__ = 'Tony Stegall' + description = 'Westhawaiitoday.com' + publisher = 'West Hawaii ' + category = 'news,Hawaii,USA' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + + masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif' + + + + + + feeds = [ + ('Local News', 'http://www.westhawaiitoday.com/?rss=local/'), + ('Local Sports', 'http://www.westhawaiitoday.com/?rss=sports/local_sports/'), + ('Big Fish List', 'http://www.westhawaiitoday.com/?rss=sports/big_fish_list/'), + ('Local Features' 'http://www.westhawaiitoday.com/?rss=features/'), + ('Obituaries', 'http://www.westhawaiitoday.com/?rss=obituaries/'), + ('Letters To Editor', 'http://www.westhawaiitoday.com/?rss=opinion/letters_-_your_voice/'), + ('Editorial', 'http://www.westhawaiitoday.com/?rss=opinion/editorial/'), + ('Columns', 'http://www.westhawaiitoday.com/?rss=opinion/columns/'), + ('Volcano Update Sunday', 'http://www.westhawaiitoday.com/?rss=volcano/') + ] + + def print_version(self, url): + split1 = url.split("//") + url1 = split1[1] + xxx = split1[2] + split2 = xxx.split(".") + artid = split2[0] + print 'ARTICLE ID IS: ', artid + + #example of link to convert + #Original link: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.txt + #print version: http://www.westhawaiitoday.com/articles/2010/08/27/local//local01.prt + + print_url = 'http://' + url1 + '//' + artid + '.prt' + print 'print_url is: ', print_url + return print_url + + #test with ebook-convert hawaii.recipe output_dir --test -vv > myrecipe.txt + + + + + + diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 51c74228b7..d57bfddd3e 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -491,6 +491,9 @@ class HTMLInput(InputFormatPlugin): return (None, raw) def preprocess_html(self, html): + if not hasattr(self, 'log'): + from calibre.utils.logging import default_log + self.log = default_log self.log("********* Preprocessing HTML *********") # Detect Chapters to match the xpath in the GUI chapdetect = re.compile(r'(?=]*>)?\s*(?P(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(|)?)(]*>)', re.IGNORECASE)