diff --git a/resources/recipes/nightflier.recipe b/resources/recipes/nightflier.recipe new file mode 100644 index 0000000000..5fc428cc76 --- /dev/null +++ b/resources/recipes/nightflier.recipe @@ -0,0 +1,46 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +nightfliersbookspace.blogspot.com +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class NightfliersBookspace(BasicNewsRecipe): + title = "Nightflier's Bookspace" + __author__ = 'Darko Miletic' + description = 'SF, Fantasy, Books, Knjige' + oldest_article = 35 + max_articles_per_feed = 100 + language = 'sr' + encoding = 'utf-8' + no_stylesheets = True + use_embedded_content = True + publication_type = 'blog' + cover_url = '' + extra_css = """ + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} + .article_description{font-family: sans1, sans-serif} + img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } + """ + + conversion_options = { + 'comment' : description + , 'tags' : 'SF, fantasy, prevod, blog, Srbija' + , 'publisher': 'Ivan Jovanovic' + , 'language' : language + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + feeds = [(u'Posts', u'http://nightfliersbookspace.blogspot.com/feeds/posts/default')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) + + diff --git a/resources/recipes/the_age.recipe b/resources/recipes/the_age.recipe index 8e4ae05575..eddb5e5000 100644 --- a/resources/recipes/the_age.recipe +++ b/resources/recipes/the_age.recipe @@ -9,15 +9,19 @@ theage.com.au from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup - +import re class TheAge(BasicNewsRecipe): - title = 'The Age' - description = 'Business News, World News and Breaking News in Melbourne, Australia' - __author__ = 'Matthew Briggs' - language = 'en_AU' - + title = 'The Age' + description = 'Business News, World News and Breaking News in Melbourne, Australia' + publication_type = 'newspaper' + __author__ = 'Matthew Briggs' + language = 'en_AU' + + max_articles_per_feed = 1000 + recursions = 0 + remove_tags = [dict(name=['table', 'script', 'noscript', 'style']), dict(name='a', attrs={'href':'/'}), dict(name='a', attrs={'href':'/text/'})] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -28,30 +32,81 @@ class TheAge(BasicNewsRecipe): soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read()) - feeds, articles = [], [] - feed = None - + section = None + sections = {} for tag in soup.findAll(['h3', 'a']): if tag.name == 'h3': - if articles: - feeds.append((feed, articles)) - articles = [] - feed = self.tag_to_string(tag) - elif feed is not None and tag.has_key('href') and tag['href'].strip(): + section = self.tag_to_string(tag) + sections[section] = [] + + # Make sure to skip: TheAge + + elif section and tag.has_key('href') and len(tag['href'].strip())>1: url = tag['href'].strip() if url.startswith('/'): - url = 'http://www.theage.com.au' + url + url = 'http://www.theage.com.au' + url title = self.tag_to_string(tag) - articles.append({ + sections[section].append({ 'title': title, 'url' : url, 'date' : strftime('%a, %d %b'), 'description' : '', 'content' : '', }) + + feeds = [] + # Insert feeds in specified order, if available + + feedSort = [ 'National', 'World', 'Opinion', 'Columns', 'Business', 'Sport', 'Entertainment' ] + for i in feedSort: + if i in sections: + feeds.append((i,sections[i])) + + # Done with the sorted feeds + + for i in feedSort: + del sections[i] + + # Append what is left over... + + for i in sections: + feeds.append((i,sections[i])) + return feeds + def get_cover_url(self): + soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/todays-paper').read()) + for i in soup.findAll('a'): + href = i['href'] + if href and re.match('http://www.theage.com.au/frontpage/[0-9]+/[0-9]+/[0-9]+/frontpage.pdf',href): + return href + + return None + + def preprocess_html(self,soup): + + for p in soup.findAll('p'): + + # Collapse the paragraph by joining the non-tag contents + + contents = [i for i in p.contents if isinstance(i,unicode)] + if len(contents): + contents = ''.join(contents) + + # Filter out what's left of the text-mode navigation stuff + + if re.match('((\s)|(\ \;))*\[[\|\s*]*\]((\s)|(\ \;))*$',contents): + p.extract() + continue + + # Shrink the fine print font + + if contents=='This material is subject to copyright and any unauthorised use, copying or mirroring is prohibited.': + p['style'] = 'font-size:small' + continue + + return soup diff --git a/resources/recipes/the_oz.recipe b/resources/recipes/the_oz.recipe index ccdce0acb6..6a897589db 100644 --- a/resources/recipes/the_oz.recipe +++ b/resources/recipes/the_oz.recipe @@ -16,7 +16,7 @@ class DailyTelegraph(BasicNewsRecipe): language = 'en_AU' oldest_article = 2 - max_articles_per_feed = 20 + max_articles_per_feed = 30 remove_javascript = True no_stylesheets = True encoding = 'utf8' @@ -48,22 +48,24 @@ class DailyTelegraph(BasicNewsRecipe): .caption{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; font-size: xx-small;} ''' - feeds = [(u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'), + feeds = [ (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'), (u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/aus_opinion_58.xml'), - (u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'), - (u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'), - (u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'), - (u'The Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'), - (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'), (u'The Nation', u'http://feeds.news.com.au/public/rss/2.0/aus_the_nation_62.xml'), - (u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'), - (u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'), - (u'Defence', u'http://feeds.news.com.au/public/rss/2.0/aus_defence_54.xml'), - (u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'), - (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml'), + (u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'), + (u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml'), (u'Climate', u'http://feeds.news.com.au/public/rss/2.0/aus_climate_809.xml'), + (u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'), + (u'IT', u'http://feeds.news.com.au/public/rss/2.0/ausit_itnews_topstories_367.xml'), + (u'Exec Tech', u'http://feeds.news.com.au/public/rss/2.0/ausit_exec_topstories_385.xml'), + (u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'), + (u'Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'), + (u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'), (u'Property', u'http://feeds.news.com.au/public/rss/2.0/aus_property_59.xml'), - (u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml')] + (u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'), + (u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'), + (u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'), + (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'), + (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')] def get_article_url(self, article): return article.id diff --git a/resources/recipes/wikinews_en.recipe b/resources/recipes/wikinews_en.recipe index cf83793702..538ab241c2 100644 --- a/resources/recipes/wikinews_en.recipe +++ b/resources/recipes/wikinews_en.recipe @@ -55,6 +55,9 @@ class WikiNews(BasicNewsRecipe): rest, sep, article_id = url.rpartition('/') return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes' + def get_cover_url(self): + return 'http://upload.wikimedia.org/wikipedia/commons/b/bd/Wikinews-logo-en.png' + def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 0a10b79bd3..6fda73f785 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -461,7 +461,7 @@ from calibre.devices.edge.driver import EDGE from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \ - GEMEI, VELOCITYMICRO + GEMEI, VELOCITYMICRO, PDNOVEL_KOBO from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO @@ -574,6 +574,7 @@ plugins += [ SPECTRA, GEMEI, VELOCITYMICRO, + PDNOVEL_KOBO, ITUNES, ] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index f98b6f7103..a1c9b790e4 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -108,6 +108,16 @@ class PDNOVEL(USBMS): with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile: coverfile.write(coverdata[2]) +class PDNOVEL_KOBO(PDNOVEL): + name = 'Pandigital Kobo device interface' + gui_name = 'PD Novel (Kobo)' + description = _('Communicate with the Pandigital Novel') + + BCD = [0x222] + + EBOOK_DIR_MAIN = 'eBooks/Kobo' + + class VELOCITYMICRO(USBMS): name = 'VelocityMicro device interface' gui_name = 'VelocityMicro' diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index a822e66758..cf15e3e29c 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -189,7 +189,7 @@ class MobiMLizer(object): para = wrapper emleft = int(round(left / self.profile.fbase)) - ems emleft = min((emleft, 10)) - while emleft > 0: + while emleft > ems/2.0: para = etree.SubElement(para, XHTML('blockquote')) emleft -= ems else: diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 8ba73c0bd1..0372642750 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -985,7 +985,8 @@ def command_restore_database(args, dbpath): return 1 if not opts.really_do_it: - print _('You must provide the --really-do-it option to do a recovery\n') + prints(_('You must provide the --really-do-it option to do a' + ' recovery'), end='\n\n') parser.print_help() return 1