Merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-10-08 20:11:18 +02:00 · 2012-10-08 20:11:18 +02:00 · e0e2a0bf40
commit e0e2a0bf40
parent 695520429a 9b8c6f218e
14 changed files with 209 additions and 75 deletions
--- a/recipes/cosmopolitan_uk.recipe
+++ b/recipes/cosmopolitan_uk.recipe
@ -15,6 +15,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    ignore_duplicate_articles = {'title'}
    preprocess_regexps = [
    (re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -1,11 +1,13 @@
 from calibre import browser
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'Countryfile.com'
    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
-    # last updated 9/9//12
+    # last updated 7/10/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
@ -13,12 +15,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    no_stylesheets = True
    auto_cleanup = True
    #articles_are_obfuscated = True
    ignore_duplicate_articles = {'title'}
    def get_cover_url(self):
            soup = self.index_to_soup('http://www.countryfile.com/')
-            cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
+
            cov = soup.find(attrs={'width' : '160', 'class' : re.compile('imagecache imagecache-160px_wide')})
            print '******** ',cov,' ***'
            cov2 = str(cov)
-            cov2=cov2[140:223]
+            cov2=cov2[10:101]
            print '******** ',cov2,' ***'
            #cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg'
            # try to get cover - if can't get known cover
@ -40,3 +44,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    (u'Country News', u'http://www.countryfile.com/rss/news'),
            (u'Countryside', u'http://www.countryfile.com/rss/countryside'),
            ]
--- a/recipes/der_spiegel.recipe
+++ b/recipes/der_spiegel.recipe
@ -72,7 +72,7 @@ class DerSpiegel(BasicNewsRecipe):
            for article in section.findNextSiblings(['dd','dt']):
                if article.name == 'dt':
                    break
-                link = article.find('a')
+                link = article.find('a', href=True)
                title = self.tag_to_string(link).strip()
                if title in self.empty_articles:
                    continue
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@ -1,5 +1,6 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'FHM UK'
    description = 'Good News for Men.'
@ -7,14 +8,15 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    #   cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
-    # last updated 1/7/12
+    # last updated 7/10/12
    language = 'en_GB'
-    oldest_article = 28
+    oldest_article = 31
-    max_articles_per_feed = 8
+    max_articles_per_feed = 15
    remove_empty_feeds = True
    no_stylesheets = True
    #auto_cleanup = True
   # articles_are_obfuscated = True
    keep_only_tags = [
               dict(name='h1'),
               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
@ -28,15 +30,13 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    ]
    feeds          = [
-        (u'Homepage 1',u'http://feed43.com/6655867614547036.xml'),
+                        # repeatable search = </div>{|}<a href="{%}" class="{*}">{%}</a>{|}<p>{*}</p>
-        (u'Homepage 2',u'http://feed43.com/4167731873103110.xml'),
+        (u'Homepage',u'http://rss.feedsportal.com/c/375/f/434908/index.rss'),
-        (u'Homepage 3',u'http://feed43.com/7667138788771570.xml'),
+        (u'Funny',u'http://rss.feedsportal.com/c/375/f/434910/index.rss'),
-        (u'Homepage 4',u'http://feed43.com/6550421522527341.xml'),
+        (u'Girls',u'http://rss.feedsportal.com/c/375/f/434913/index.rss'),
        (u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
        (u'Gaming',u'http://feed43.com/6537162612465672.xml'),
        (u'Girls',u'http://feed43.com/4574262733341068.xml'),# edit link http://feed43.com/feed.html?name=4574262733341068
 ]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@ -4,7 +4,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    title          = u'New Musical Express Magazine'
    description = 'Author D.Asbury. UK Rock & Pop Mag. '
    __author__ = 'Dave Asbury'
-    # last updated 9/6/12
+    # last updated 7/10/12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
@ -14,15 +14,13 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    language = 'en_GB'
    def get_cover_url(self):
-          soup = self.index_to_soup('http://www.magazinesdirect.com/categories/mens/tv-and-music/')
+        soup = self.index_to_soup('http://www.nme.com/component/subscribe')
-          cov = soup.find(attrs={'title' : 'NME magazine subscriptions'})
+        cov = soup.find(attrs={'id' : 'magazine_cover'})
-          cov2 = 'http://www.magazinesdirect.com'+cov['src']
+        cov2 = str(cov['src'])
          print '***cov =  ',cov2,' ***'
          cover_url = str(cov2)
        # print '**** Cov url =*', cover_url,'***'
        #print '**** Cov url =*','http://www.magazinesdirect.com/article_images/articledir_3138/1569221/1_largelisting.jpg','***'
        br = browser()
        br.set_handle_redirect(False)
        try:
@ -31,8 +29,8 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        except:
                cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
        return cover_url
    masthead_url   = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
    masthead_url   = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
    remove_tags = [
        dict( attrs={'class':'clear_icons'}),
@ -61,9 +59,15 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    feeds          = [
-    (u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
+    (u'NME News', u'http://feeds.feedburner.com/nmecom/rss/newsxml?format=xml'),
    #(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
-    (u'Reviews',u'http://feed43.com/4138608576351646.xml'),
+    (u'Reviews',u'http://feed43.com/1817687144061333.xml'),
                    (u'Bloggs',u'http://feed43.com/3326754333186048.xml'),
    ]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
        '''
--- a/recipes/pvp_online.recipe
+++ b/recipes/pvp_online.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1344926684(BasicNewsRecipe):
    title          = u'PVP online'
    __author__ = 'Krittika Goyal'
    oldest_article = 7
    max_articles_per_feed = 100
    #auto_cleanup = True
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'en'
    remove_javascript      = True
    keep_only_tags = [dict(name='div', attrs={'class':'body'})]
    remove_tags = [dict(name='div', attrs={'class':'prevBg'}),dict(name='div', attrs={'class':'nextBg'}),dict(name='div', attrs={'class':'postMeta'})]
    feeds          = [(u'Comics', u'http://pvponline.com/feed'), ]
--- a/recipes/shortlist.recipe
+++ b/recipes/shortlist.recipe
@ -5,13 +5,15 @@ class AdvancedUserRecipe1324663493(BasicNewsRecipe):
    title          = u'Shortlist'
    description = 'Articles From Shortlist.com'
    # I've set oldest article to 7 days as the website updates weekly
-    oldest_article = 7
+    oldest_article = 8
-    max_articles_per_feed = 12
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    ignore_duplicate_articles = {'title'}
    __author__ = 'Dave Asbury'
-    # last updated 19/5/12
+    # last updated 7/10/12
    language = 'en_GB'
    def get_cover_url(self):
            soup = self.index_to_soup('http://www.shortlist.com')
@ -45,17 +47,16 @@ class AdvancedUserRecipe1324663493(BasicNewsRecipe):
                     ]
    feeds          = [
-                               (u'Home carousel',u'http://feed43.com/7106317222455380.xml'),
+                               #edit http://feed43.com/feed.html?name=3156308700147005
-                               (u'This Weeks Issue', u'http://feed43.com/0323588208751786.xml'),
+                               # repeatable pattern = <h3>{_}<a href="{%}">{%}</a>{*}</h3>
-         (u'Cool Stuff',u'http://feed43.com/6253845228768456.xml'),
+
-                                (u'Style',u'http://feed43.com/7217107577215678.xml'),
+                                (u'This Weeks Issue', u'http://feed43.com/5205766657404804.xml'),
-                                (u'Films',u'http://feed43.com/3101308515277265.xml'),
+                                (u'Home Page',u'http://feed43.com/3156308700147005.xml'),
-         (u'Music',u'http://feed43.com/2416400550560162.xml'),
+                                (u'Cool Stuff',u'http://feed43.com/1557051772026706.xml'),
-         (u'TV',u'http://feed43.com/4781172470717123.xml'),
+                                (u'Style',u'http://feed43.com/4168836374571502.xml'),
-         (u'Sport',u'http://feed43.com/5303151885853308.xml'),
+                                (u'Entertainment',u'http://feed43.com/4578504030588024.xml'),
-         (u'Gaming',u'http://feed43.com/8883764600355347.xml'),
+
                                (u'Women',u'http://feed43.com/2648221746514241.xml'),
                                (u'Instant Improver', u'http://feed43.com/1236541026275417.xml'),
    #(u'Articles', u'http://feed43.com/3428534448355545.xml')
    ]
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -40,6 +40,7 @@ class ANDROID(USBMS):
                       0xca4  : HTC_BCDS,
                       0xca9  : HTC_BCDS,
                       0xcac  : HTC_BCDS,
                       0xcba  : HTC_BCDS,
                       0xccf  : HTC_BCDS,
                       0xcd6  : HTC_BCDS,
                       0xce5  : HTC_BCDS,
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -12,19 +12,17 @@ Originally developed by Timothy Legge <timlegge@gmail.com>.
 Extended to support Touch firmware 2.0.0 and later and newer devices by David Forrester <davidfor@internode.on.net>
 '''
-import os, time, calendar
+import os, time
 from contextlib import closing
 from calibre.devices.usbms.books import BookList
 from calibre.devices.usbms.books import CollectionsBookList
 from calibre.devices.kobo.books import KTCollectionsBookList
 from calibre.devices.kobo.books import Book
 from calibre.devices.kobo.books import ImageWrapper
 from calibre.devices.kobo.bookmark import Bookmark
 from calibre.devices.mime import mime_type_ext
 from calibre.devices.usbms.driver import USBMS, debug_print
 from calibre import prints
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.constants import DEBUG
 from calibre.utils.config import prefs
@ -994,6 +992,7 @@ class KOBO(USBMS):
        return USBMS.create_annotations_path(self, mdata)
    def get_annotations(self, path_map):
        from calibre.devices.kobo.bookmark import Bookmark
        EPUB_FORMATS = [u'epub']
        epub_formats = set(EPUB_FORMATS)
@ -1056,6 +1055,7 @@ class KOBO(USBMS):
        return bookmarked_books
    def generate_annotation_html(self, bookmark):
        import calendar
        from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
        # Returns <div class="user_annotations"> ... </div>
        #last_read_location = bookmark.last_read_location
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -6,15 +6,19 @@ __docformat__ = 'restructuredtext en'
 '''
 Convert an ODT file into a Open Ebook
 '''
-import os
+import os, logging
 from lxml import etree
 from cssutils import CSSParser
 from cssutils.css import CSSRule
 from odf.odf2xhtml import ODF2XHTML
 from odf.opendocument import load as odLoad
 from odf.draw import Frame as odFrame, Image as odImage
 from odf.namespaces import TEXTNS as odTEXTNS
 from calibre import CurrentDir, walk
 from calibre.ebooks.oeb.base import _css_logger
 class Extract(ODF2XHTML):
@ -29,14 +33,14 @@ class Extract(ODF2XHTML):
    def fix_markup(self, html, log):
        root = etree.fromstring(html)
        self.epubify_markup(root, log)
        self.filter_css(root, log)
-        self.extract_css(root)
+        self.extract_css(root, log)
        self.epubify_markup(root, log)
        html = etree.tostring(root, encoding='utf-8',
                xml_declaration=True)
        return html
-    def extract_css(self, root):
+    def extract_css(self, root, log):
        ans = []
        for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
            ans.append(s.text)
@ -51,9 +55,21 @@ class Extract(ODF2XHTML):
            etree.SubElement(head, ns+'link', {'type':'text/css',
                'rel':'stylesheet', 'href':'odfpy.css'})
-        with open('odfpy.css', 'wb') as f:
+        css = u'\n\n'.join(ans)
-            f.write((u'\n\n'.join(ans)).encode('utf-8'))
+        parser = CSSParser(loglevel=logging.WARNING,
                            log=_css_logger)
        self.css = parser.parseString(css, validate=False)
        with open('odfpy.css', 'wb') as f:
            f.write(css.encode('utf-8'))
    def get_css_for_class(self, cls):
        if not cls: return None
        for rule in self.css.cssRules.rulesOfType(CSSRule.STYLE_RULE):
            for sel in rule.selectorList:
                q = sel.selectorText
                if q == '.' + cls:
                    return rule
    def epubify_markup(self, root, log):
        from calibre.ebooks.oeb.base import XPath, XHTML
@ -84,16 +100,54 @@ class Extract(ODF2XHTML):
                div.attrib['style'] = style
                img.attrib['style'] = 'max-width: 100%; max-height: 100%'
-        # A div/div/img construct causes text-align:center to not work in ADE
+        # Handle anchored images. The default markup + CSS produced by
-        # so set the display of the second div to inline. This should have no
+        # odf2xhtml works with WebKit but not with ADE. So we convert the
-        # effect (apart from minor vspace issues) in a compliant HTML renderer
+        # common cases of left/right/center aligned block images to work on
-        # but it fixes the centering of the image via a text-align:center on
+        # both webkit and ADE. We detect the case of setting the side margins
-        # the first div in ADE
+        # to auto and map it to an appropriate text-align directive, which
        # works in both WebKit and ADE.
        # https://bugs.launchpad.net/bugs/1063207
        # https://bugs.launchpad.net/calibre/+bug/859343
        imgpath = XPath('descendant::h:div/h:div/h:img')
        for img in imgpath(root):
            div2 = img.getparent()
            div1 = div2.getparent()
-            if len(div1) == len(div2) == 1:
+            if (len(div1), len(div2)) != (1, 1): continue
            cls = div1.get('class', '')
            first_rules = filter(None, [self.get_css_for_class(x) for x in
                cls.split()])
            has_align = False
            for r in first_rules:
                if r.style.getProperty(u'text-align') is not None:
                    has_align = True
            ml = mr = None
            if not has_align:
                aval = None
                cls = div2.get(u'class', u'')
                rules = filter(None, [self.get_css_for_class(x) for x in
                    cls.split()])
                for r in rules:
                    ml = r.style.getPropertyCSSValue(u'margin-left') or ml
                    mr = r.style.getPropertyCSSValue(u'margin-right') or mr
                    ml = getattr(ml, 'value', None)
                    mr = getattr(mr, 'value', None)
                if ml == mr == u'auto':
                    aval = u'center'
                elif ml == u'auto' and mr != u'auto':
                    aval = 'right'
                elif ml != u'auto' and mr == u'auto':
                    aval = 'left'
                if aval is not None:
                    style = div1.attrib.get('style', '').strip()
                    if style and not style.endswith(';'):
                        style = style + ';'
                    style += 'text-align:%s'%aval
                    has_align = True
                    div1.attrib['style'] = style
            if has_align:
                # This is needed for ADE, without it the text-align has no
                # effect
                style = div2.attrib['style']
                div2.attrib['style'] = 'display:inline;'+style
--- a/src/calibre/utils/smtp.py
+++ b/src/calibre/utils/smtp.py
@ -211,23 +211,25 @@ def main(args=sys.argv):
        msg = compose_mail(args[1], args[2], args[3], subject=opts.subject,
                           attachment=opts.attachment)
        from_, to = args[1:3]
-        efrom, eto = map(extract_email_address, (from_, to))
+        eto = [extract_email_address(x.strip()) for x in to.split(',')]
-        eto = [eto]
+        efrom = extract_email_address(from_)
    else:
        msg = sys.stdin.read()
-        from email.parser import Parser
+        from email import message_from_string
        from email.utils import getaddresses
-        eml = Parser.parsestr(msg, headersonly=True)
+        eml = message_from_string(msg)
        tos = eml.get_all('to', [])
-        ccs = eml.get_all('cc', [])
+        ccs = eml.get_all('cc', []) + eml.get_all('bcc', [])
-        eto = getaddresses(tos + ccs)
+        all_tos = []
        for x in tos + ccs:
            all_tos.extend(y.strip() for y in x.split(','))
        eto = list(map(extract_email_address, all_tos))
        if not eto:
            raise ValueError('Email from STDIN does not specify any recipients')
        efrom = getaddresses(eml.get_all('from', []))
        if not efrom:
            raise ValueError('Email from STDIN does not specify a sender')
-        efrom = efrom[0]
+        efrom = efrom[0][1]
    outbox = None
    if opts.outbox is not None:
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -265,6 +265,12 @@ class Feed(object):
        if i > -1:
            self.articles[i:i+1] = []
    def remove_article(self, article):
        try:
            self.articles.remove(article)
        except ValueError:
            pass
 class FeedCollection(list):
    def __init__(self, feeds):
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -167,9 +167,10 @@ class BasicNewsRecipe(Recipe):
    extra_css              = None
    #: If True empty feeds are removed from the output.
-    #: This option has no effect if parse_index is overriden in
+    #: This option has no effect if parse_index is overridden in
    #: the sub class. It is meant only for recipes that return a list
-    #: of feeds using `feeds` or :meth:`get_feeds`.
+    #: of feeds using `feeds` or :meth:`get_feeds`. It is also used if you use
    #: the ignore_duplicate_articles option.
    remove_empty_feeds = False
    #: List of regular expressions that determines which links to follow
@ -321,6 +322,15 @@ class BasicNewsRecipe(Recipe):
    #: The string will be used as the disabled message
    recipe_disabled = None
    #: Ignore duplicates of articles that are present in more than one section.
    #: A duplicate article is an article that has the same title and/or URL.
    #: To ignore articles with the same title, set this to:
    #: ignore_duplicate_articles = {'title'}
    #: To use URLs instead, set it to:
    #: ignore_duplicate_articles = {'url'}
    #: To match on title or URL, set it to:
    #: ignore_duplicate_articles = {'title', 'url'}
    ignore_duplicate_articles = None
    # See the built-in profiles for examples of these settings.
@ -1019,6 +1029,28 @@ class BasicNewsRecipe(Recipe):
            url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
        return self._fetch_article(url, dir,  f, a, num_of_feeds)
    def remove_duplicate_articles(self, feeds):
        seen_keys = defaultdict(set)
        remove = []
        for f in feeds:
            for article in f:
                for key in self.ignore_duplicate_articles:
                    val = getattr(article, key)
                    seen = seen_keys[key]
                    if val:
                        if val in seen:
                            remove.append((f, article))
                        else:
                            seen.add(val)
        for feed, article in remove:
            self.log.debug('Removing duplicate article: %s from section: %s'%(
                article.title, feed.title))
            feed.remove_article(article)
        if self.remove_empty_feeds:
            feeds = [f for f in feeds if len(f) > 0]
        return feeds
    def build_index(self):
        self.report_progress(0, _('Fetching feeds...'))
@ -1033,6 +1065,9 @@ class BasicNewsRecipe(Recipe):
        if not feeds:
            raise ValueError('No articles found, aborting')
        if self.ignore_duplicate_articles is not None:
            feeds = self.remove_duplicate_articles(feeds)
        #feeds = FeedCollection(feeds)
        self.report_progress(0, _('Trying to download cover...'))
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -68,7 +68,12 @@ def serialize_collection(mapping_of_recipe_classes):
            key=lambda key: force_unicode(
                getattr(mapping_of_recipe_classes[key], 'title', 'zzz'),
                'utf-8')):
        try:
            recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn])
        except:
            import traceback
            traceback.print_exc()
            continue
        collection.append(recipe)
    collection.set('count', str(len(collection)))
    return etree.tostring(collection, encoding='utf-8', xml_declaration=True,