diff --git a/resources/images/news/nytimes_sports.png b/resources/images/news/nytimes_sports.png new file mode 100644 index 0000000000..b587be8de0 Binary files /dev/null and b/resources/images/news/nytimes_sports.png differ diff --git a/resources/images/news/nytimes_tech.png b/resources/images/news/nytimes_tech.png new file mode 100644 index 0000000000..64ff8b5eb2 Binary files /dev/null and b/resources/images/news/nytimes_tech.png differ diff --git a/resources/recipes/epl_talk.recipe b/resources/recipes/epl_talk.recipe index 297dffd89c..432b396339 100644 --- a/resources/recipes/epl_talk.recipe +++ b/resources/recipes/epl_talk.recipe @@ -1,6 +1,6 @@ #!/usr/bin/env python __license__ = 'GPL 3' -__copyright__ = 'zotzot' +__copyright__ = 'zotzo' __docformat__ = 'restructuredtext en' ''' http://www.epltalk.com @@ -9,10 +9,9 @@ from calibre.web.feeds.news import BasicNewsRecipe class EPLTalkRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'The Gaffer' language = 'en' - version = 1 + version = 2 + __author__ = 'rylsfan' title = u'EPL Talk' publisher = u'The Gaffer' @@ -21,17 +20,40 @@ class EPLTalkRecipe(BasicNewsRecipe): description = u'News and Analysis from the English Premier League' cover_url = 'http://bit.ly/hJxZPu' - oldest_article = 45 - max_articles_per_feed = 150 + oldest_article = 3 + max_articles_per_feed = 100 use_embedded_content = True remove_javascript = True encoding = 'utf8' - remove_tags_after = [dict(name='div', attrs={'class':'pd-rating'})] + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - feeds = [(u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk')] + remove_tags = [ + {'class': 'feedflare'}, + {'class': 'tweetmeme_button'}, + {'class': 'eplrelated'}, + {'p': 'Related posts:
    '}, + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + feeds =[ + (u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk'), + (u'MLS Talk', u'http://feeds.feedburner.com/majorleaguesoccertalksite'), + #(), + #(), + #(), + ] extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif;} - img {float: left; margin-right: 0.5em;} - ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/resources/recipes/nytimes_sports.recipe b/resources/recipes/nytimes_sports.recipe new file mode 100644 index 0000000000..f394fc3232 --- /dev/null +++ b/resources/recipes/nytimes_sports.recipe @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = 'zotzo' +__docformat__ = 'restructuredtext en' +""" +http://fifthdown.blogs.nytimes.com/ +http://offthedribble.blogs.nytimes.com/ +http://thequad.blogs.nytimes.com/ +http://slapshot.blogs.nytimes.com/ +http://goal.blogs.nytimes.com/ +http://bats.blogs.nytimes.com/ +http://straightsets.blogs.nytimes.com/ +http://formulaone.blogs.nytimes.com/ +http://onpar.blogs.nytimes.com/ +""" + +from calibre.web.feeds.news import BasicNewsRecipe + + +class NYTimesSports(BasicNewsRecipe): + title = 'New York Times Sports Beat' + language = 'en' + __author__ = 'rylsfan' + description = 'Indepth sports from the New York Times' + publisher = 'The New York Times' + category = 'Sports' + oldest_article = 3 + max_articles_per_feed = 25 + no_stylesheets = True + language = 'en' + #cover_url ='http://bit.ly/h8F4DO' + feeds = [ + (u'The Fifth Down', u'http://fifthdown.blogs.nytimes.com/feed/'), + (u'Off The Dribble', u'http://offthedribble.blogs.nytimes.com/feed/'), + (u'The Quad', u'http://thequad.blogs.nytimes.com/feed/'), + (u'Slap Shot', u'http://slapshot.blogs.nytimes.com/feed/'), + (u'Goal', u'http://goal.blogs.nytimes.com/feed/'), + (u'Bats', u'http://bats.blogs.nytimes.com/feed/'), + (u'Straight Sets', u'http://straightsets.blogs.nytimes.com/feed/'), + (u'Formula One', u'http://formulaone.blogs.nytimes.com/feed/'), + (u'On Par', u'http://onpar.blogs.nytimes.com/feed/'), + ] + keep_only_tags = [dict(name='div', attrs={'id':'header'}), + dict(name='h1'), + dict(name='h2'), + dict(name='div', attrs={'class':'entry-content'})] + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/resources/recipes/nytimes_tech.recipe b/resources/recipes/nytimes_tech.recipe new file mode 100644 index 0000000000..7db2db9781 --- /dev/null +++ b/resources/recipes/nytimes_tech.recipe @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = 'zotzo' +__docformat__ = 'restructuredtext en' +""" +http://pogue.blogs.nytimes.com/ +""" + +from calibre.web.feeds.news import BasicNewsRecipe + + +class NYTimesTechnology(BasicNewsRecipe): + title = 'New York Times Technology Beat' + language = 'en' + __author__ = 'David Pogue' + description = 'The latest in technology from David Pogue' + publisher = 'The New York Times' + category = 'Technology' + oldest_article = 14 + max_articles_per_feed = 25 + no_stylesheets = True + language = 'en' + cover_url ='http://bit.ly/g0SKJT' + feeds = [ + (u'Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'), + (u'Bits', u'http://bits.blogs.nytimes.com/feed/'), + (u'Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'), + (u'Open', u'http://open.blogs.nytimes.com/feed/') + ] + keep_only_tags = [dict(name='div', attrs={'id':'header'}), + dict(name='h1'), + dict(name='h2'), + dict(name='div', attrs={'class':'entry-content'})] + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; + font-weight:bold;font-size:large;} + + h2{font-family:Arial,Helvetica,sans-serif; + font-weight:normal;font-size:small;} + + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index e3e9917491..7ff81f8ad1 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -124,7 +124,10 @@ class Metadata(object): def __setattr__(self, field, val, extra=None): _data = object.__getattribute__(self, '_data') if field in TOP_LEVEL_IDENTIFIERS: + field, val = self._clean_identifier(field, val) _data['identifiers'].update({field: val}) + elif field == 'identifiers': + self.set_identifiers(val) elif field in STANDARD_METADATA_FIELDS: if val is None: val = NULL_VALUES.get(field, None) @@ -189,8 +192,35 @@ class Metadata(object): ans = {} return copy.deepcopy(ans) + def _clean_identifier(self, typ, val): + typ = icu_lower(typ).strip().replace(':', '').replace(',', '') + val = val.strip().replace(',', '|').replace(':', '|') + return typ, val + def set_identifiers(self, identifiers): - object.__getattribute__(self, '_data')['identifiers'] = identifiers + ''' + Set all identifiers. Note that if you previously set ISBN, calling + this method will delete it. + ''' + cleaned = {} + for key, val in identifiers.iteritems(): + key, val = self._clean_identifier(key, val) + if key and val: + cleaned[key] = val + object.__getattribute__(self, '_data')['identifiers'] = cleaned + + def set_identifier(self, typ, val): + 'If val is empty, deletes identifier of type typ' + typ, val = self._clean_identifier(typ, val) + if not typ: + return + identifiers = object.__getattribute__(self, + '_data')['identifiers'] + + if not val and typ in identifiers: + identifiers.pop(typ) + if val: + identifiers[typ] = val # field-oriented interface. Intended to be the same as in LibraryDatabase