diff --git a/recipes/icons/prostamerika.png b/recipes/icons/prostamerika.png new file mode 100644 index 0000000000..f88c846bed Binary files /dev/null and b/recipes/icons/prostamerika.png differ diff --git a/recipes/icons/sb_nation.png b/recipes/icons/sb_nation.png new file mode 100644 index 0000000000..9d82e492de Binary files /dev/null and b/recipes/icons/sb_nation.png differ diff --git a/recipes/icons/wvhooligan.png b/recipes/icons/wvhooligan.png new file mode 100644 index 0000000000..81a59118c9 Binary files /dev/null and b/recipes/icons/wvhooligan.png differ diff --git a/recipes/prostamerika.recipe b/recipes/prostamerika.recipe new file mode 100644 index 0000000000..b216ee469d --- /dev/null +++ b/recipes/prostamerika.recipe @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# encoding: utf-8 + +__license__ = 'GPL 3' +__copyright__ = 'zotzo' + +""" +http://www.prostamerika.com/ +""" + +from calibre.web.feeds.news import BasicNewsRecipe + + +class ProstAmerika(BasicNewsRecipe): + title = 'Prost Amerika' + language = 'en' + __author__ = 'rylsfan' + #authors = + description = 'Seattle soccer with a European accent. News, features, and match reports.' + publisher = 'ProstAmerika' # 4464 fremont avenue n, # 209, Seattle, 98103, United States + category = 'Sports' + + oldest_article = 7 + max_articles_per_feed = 100 + + cover_url = 'http://img17.imageshack.us/img17/9498/prostamerika.jpg' + masthead_url = 'http://www.prostamerika.com/soundersfc/wp-content/uploads/2011/02/PASoccer_taglinewhole.jpg' + + encoding = 'utf-8' + + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + + feeds =[ + (u'Cascadia', u'http://www.prostamerika.com/category/localfootball/feed/' ), + (u'MLS', u'http://www.prostamerika.com/category/mls/feed/'), + (u'EPL', u'http://www.prostamerika.com/category/epl/feed/'), + (u'World', u'http://www.prostamerika.com/category/international-soccer/feed/'), + (u'Fan Culture',u'http://www.prostamerika.com/category/fan-culture/feed/') + + ] + + keep_only_tags = [dict(name='div', attrs={'id':'maincontent'})] + remove_tags = [ + {'class':'tweetmeme_button'}, + {'class':'wp-caption-text'} + ] + + + remove_tags_after =[ + {'class':'tweetmeme_button'} + ] + + extra_css = ''' + h1{font-family:Didot,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + + + diff --git a/recipes/sb_nation.recipe b/recipes/sb_nation.recipe new file mode 100644 index 0000000000..19b828cdb7 --- /dev/null +++ b/recipes/sb_nation.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'Zotzo' +''' +http://www.stumptownfooty.com/ +http://www.eightysixforever.com +http://www.sounderatheart.com +http://www.dailysoccerfix.com/ + +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class SBNation(BasicNewsRecipe): + title = u'SBNation' + __author__ = 'rylsfan' + description = u"More than 290 individual communities, each offering high quality year-round coverage and conversation led by fans who are passionate." + oldest_article = 3 + language = 'en' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + + #cover_url = 'http://img132.imageshack.us/img132/4913/2hyggjegqqdywzn9.png' + + keep_only_tags = [ + dict(name='h2', attrs={'class':'title'}) + ,dict(name='div', attrs={'class':'entry-body'}) + ] + + remove_tags_after = dict(name='div', attrs={'class':'footline entry-actions'}) + remove_tags = [ + dict(name='div', attrs={'class':'footline entry-actions'}), + {'class': 'extend-divide'} + ] + # SBNation has 300 special blogs to choose from. These are just a couple! + feeds = [ + (u'Daily Fix', u'http://www.dailysoccerfix.com/rss/'), + (u"Stumptown Footy", u'http://www.stumptownfooty.com/rss/'), + (u'Sounders', u'http://www.sounderatheart.com/rss/'), + (u'Whitecaps', u'http://www.eightysixforever.com/rss/'), + ] + + extra_css = """ + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + p{font-family:Helvetica,sans-serif; display: block; text-align: left; text-decoration: none; text-indent: 0%;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + """ + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + def populate_article_metadata(self, article, soup, first): + h2 = soup.find('h2') + h2.replaceWith(h2.prettify() + '
By ' + article.author + '
') diff --git a/recipes/wvhooligan.recipe b/recipes/wvhooligan.recipe new file mode 100644 index 0000000000..680ac6f244 --- /dev/null +++ b/recipes/wvhooligan.recipe @@ -0,0 +1,61 @@ +#!/usr/bin/env python +__license__ = 'GPL 3' +__copyright__ = 'zotzo' +__docformat__ = 'restructuredtext en' +''' +http://wvhooligan.com/ +''' +from calibre.web.feeds.news import BasicNewsRecipe +#import re + +class wvHooligan(BasicNewsRecipe): + authors = u'Drew Epperley' + __author__ = 'rylsfan' + language = 'en' + version = 2 + + title = u'WV Hooligan' + publisher = u'Drew Epperley' + publication_type = 'Blog' + category = u'Soccer' + description = u'A look at Major League Soccer (MLS) through the eyes of a MLS writer and fan.' + + cover_url = 'http://wvhooligan.com/wp-content/themes/urbanelements/images/logo3.png' + + oldest_article = 15 + max_articles_per_feed = 150 + use_embedded_content = True + no_stylesheets = True + remove_javascript = True + encoding = 'utf8' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [ + {'class': 'feedflare'}, + {'class': 'tweetmeme_button'}, + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + feeds =[ + (u'Stories', u'http://feeds2.feedburner.com/wvhooligan'), + (u'MLS', u'http://wvhooligan.com/category/mls/feed/'), + (u'MLS Power Rankings', u'http://wvhooligan.com/category/power-rankings/feed/'), + (u'MLS Expansion', u'http://wvhooligan.com/category/mls/expansion-talk/feed/'), + (u'US National Team', u'http://wvhooligan.com/category/us-national-team/feed/'), + (u'College', u'http://wvhooligan.com/category/college-soccer/feed/'), + ] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 5bc0c5b256..ed7d8f2203 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -42,6 +42,7 @@ def is_worker_alive(workers): return False def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): + start_time = time.time() plugins = list(metadata_plugins['identify']) kwargs = { @@ -105,3 +106,21 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): log(plog) log('\n'+'*'*80) + for i, result in enumerate(results): + result.relevance_in_source = i + result.has_cached_cover_url = \ + plugin.get_cached_cover_url(result.identifiers) is not None + result.identify_plugin = plugin + + log('The identify phase took %.2f seconds'%(time.time() - start_time)) + log('Merging results from different sources and finding earliest', + 'publication dates') + start_time = time.time() + merged_results = merge_identify_results(results, log) + log('We have %d merged results, merging took: %.2f seconds' % + (len(merged_results), time.time() - start_time)) + +def merge_identify_results(result_map, log): + pass + +