Merge from trunk

2025-08-30 23:00:21 -04:00 · 2011-03-01 15:37:00 +00:00 · 2011-03-01 15:37:00 +00:00 · 06562f5fa3
commit 06562f5fa3
parent 52e2c7b4e4 2a3f412b48
6 changed files with 165 additions and 12 deletions
--- a/resources/images/news/nytimes_sports.png
+++ b/resources/images/news/nytimes_sports.png
--- a/resources/images/news/nytimes_tech.png
+++ b/resources/images/news/nytimes_tech.png
--- a/resources/recipes/epl_talk.recipe
+++ b/resources/recipes/epl_talk.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 __license__ = 'GPL 3'
-__copyright__ = 'zotzot'
+__copyright__ = 'zotzo'
 __docformat__ = 'restructuredtext en'
 '''
 http://www.epltalk.com
@ -9,10 +9,9 @@ from calibre.web.feeds.news import BasicNewsRecipe


 class EPLTalkRecipe(BasicNewsRecipe):
-    __license__ = 'GPL v3'
-    __author__ = u'The Gaffer'
    language = 'en'
-    version = 1
+    version = 2
+    __author__ = 'rylsfan'

    title = u'EPL Talk'
    publisher = u'The Gaffer'
@ -21,17 +20,40 @@ class EPLTalkRecipe(BasicNewsRecipe):
    description = u'News and Analysis from the English Premier League'
    cover_url = 'http://bit.ly/hJxZPu'

-    oldest_article = 45
-    max_articles_per_feed = 150
+    oldest_article = 3
+    max_articles_per_feed = 100
    use_embedded_content = True
    remove_javascript = True
    encoding = 'utf8'

-    remove_tags_after = [dict(name='div', attrs={'class':'pd-rating'})]
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }

-    feeds = [(u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk')]
+    remove_tags =  [
+                       {'class': 'feedflare'},
+                       {'class': 'tweetmeme_button'},
+                       {'class': 'eplrelated'},
+                       {'p': 'Related posts:<ol>'},
+                   ]
+
+    def preprocess_html(self, soup):
+       return self.adeify_images(soup)
+
+    feeds =[
+               (u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk'),
+               (u'MLS Talk', u'http://feeds.feedburner.com/majorleaguesoccertalksite'),
+               #(),
+               #(),
+               #(),
+           ]

    extra_css = '''
-                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
-                img {float: left; margin-right: 0.5em;}
-                '''
+           h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+           h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+           p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+           body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+               '''
--- a/resources/recipes/nytimes_sports.recipe
+++ b/resources/recipes/nytimes_sports.recipe
@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = 'zotzo'
+__docformat__ = 'restructuredtext en'
+"""
+http://fifthdown.blogs.nytimes.com/
+http://offthedribble.blogs.nytimes.com/
+http://thequad.blogs.nytimes.com/
+http://slapshot.blogs.nytimes.com/
+http://goal.blogs.nytimes.com/
+http://bats.blogs.nytimes.com/
+http://straightsets.blogs.nytimes.com/
+http://formulaone.blogs.nytimes.com/
+http://onpar.blogs.nytimes.com/
+"""
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NYTimesSports(BasicNewsRecipe):
+    title = 'New York Times Sports Beat'
+    language = 'en'
+    __author__ = 'rylsfan'
+    description = 'Indepth sports from the New York Times'
+    publisher = 'The New York Times'
+    category = 'Sports'
+    oldest_article = 3
+    max_articles_per_feed = 25
+    no_stylesheets = True
+    language = 'en'
+    #cover_url ='http://bit.ly/h8F4DO'
+    feeds = [
+               (u'The Fifth Down', u'http://fifthdown.blogs.nytimes.com/feed/'),
+               (u'Off The Dribble', u'http://offthedribble.blogs.nytimes.com/feed/'),
+               (u'The Quad', u'http://thequad.blogs.nytimes.com/feed/'),
+               (u'Slap Shot', u'http://slapshot.blogs.nytimes.com/feed/'),
+               (u'Goal', u'http://goal.blogs.nytimes.com/feed/'),
+               (u'Bats', u'http://bats.blogs.nytimes.com/feed/'),
+               (u'Straight Sets', u'http://straightsets.blogs.nytimes.com/feed/'),
+               (u'Formula One', u'http://formulaone.blogs.nytimes.com/feed/'),
+               (u'On Par', u'http://onpar.blogs.nytimes.com/feed/'),
+           ]
+    keep_only_tags = [dict(name='div', attrs={'id':'header'}),
+            dict(name='h1'),
+            dict(name='h2'),
+            dict(name='div', attrs={'class':'entry-content'})]
+    extra_css =    '''
+           h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+           h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+           p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+           body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+                   '''
--- a/resources/recipes/nytimes_tech.recipe
+++ b/resources/recipes/nytimes_tech.recipe
@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = 'zotzo'
+__docformat__ = 'restructuredtext en'
+"""
+http://pogue.blogs.nytimes.com/
+"""
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NYTimesTechnology(BasicNewsRecipe):
+    title = 'New York Times Technology Beat'
+    language = 'en'
+    __author__ = 'David Pogue'
+    description = 'The latest in technology from David Pogue'
+    publisher = 'The New York Times'
+    category = 'Technology'
+    oldest_article = 14
+    max_articles_per_feed = 25
+    no_stylesheets = True
+    language = 'en'
+    cover_url ='http://bit.ly/g0SKJT'
+    feeds = [
+               (u'Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
+               (u'Bits', u'http://bits.blogs.nytimes.com/feed/'),
+               (u'Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
+               (u'Open', u'http://open.blogs.nytimes.com/feed/')
+                   ]
+    keep_only_tags = [dict(name='div', attrs={'id':'header'}),
+            dict(name='h1'),
+            dict(name='h2'),
+            dict(name='div', attrs={'class':'entry-content'})]
+    extra_css = '''
+           h1{font-family:Arial,Helvetica,sans-serif;
+               font-weight:bold;font-size:large;}
+
+           h2{font-family:Arial,Helvetica,sans-serif;
+               font-weight:normal;font-size:small;}
+
+           p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+           body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+               '''
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -124,7 +124,10 @@ class Metadata(object):
    def __setattr__(self, field, val, extra=None):
        _data = object.__getattribute__(self, '_data')
        if field in TOP_LEVEL_IDENTIFIERS:
+            field, val = self._clean_identifier(field, val)
            _data['identifiers'].update({field: val})
+        elif field == 'identifiers':
+            self.set_identifiers(val)
        elif field in STANDARD_METADATA_FIELDS:
            if val is None:
                val = NULL_VALUES.get(field, None)
@ -189,8 +192,35 @@ class Metadata(object):
            ans = {}
        return copy.deepcopy(ans)

+    def _clean_identifier(self, typ, val):
+        typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
+        val = val.strip().replace(',', '|').replace(':', '|')
+        return typ, val
+
    def set_identifiers(self, identifiers):
-        object.__getattribute__(self, '_data')['identifiers'] = identifiers
+        '''
+        Set all identifiers. Note that if you previously set ISBN, calling
+        this method will delete it.
+        '''
+        cleaned = {}
+        for key, val in identifiers.iteritems():
+            key, val = self._clean_identifier(key, val)
+            if key and val:
+                cleaned[key] = val
+        object.__getattribute__(self, '_data')['identifiers'] = cleaned
+
+    def set_identifier(self, typ, val):
+        'If val is empty, deletes identifier of type typ'
+        typ, val = self._clean_identifier(typ, val)
+        if not typ:
+            return
+        identifiers = object.__getattribute__(self,
+            '_data')['identifiers']
+
+        if not val and typ in identifiers:
+            identifiers.pop(typ)
+        if val:
+            identifiers[typ] = val

    # field-oriented interface. Intended to be the same as in LibraryDatabase