Merge from trunk

2025-11-07 15:23:13 -05:00 · 2011-05-21 19:35:43 +01:00 · 2011-05-21 19:35:43 +01:00 · 45fcacf2ec
commit 45fcacf2ec
parent 994974fb59 224968f239
6 changed files with 560 additions and 385 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -34,7 +34,7 @@

    - title: "Add a tweak that controls what words are treated as suffixes when generating an author sort string from an author name."

-    - title: "Get Books: Store alst few searches in history"
+    - title: "Get Books: Store last few searches in history"
  
  bug fixes:
    - title: "Fix a crash when a device is connected/disconnected while a modal dialog opened from the toolbar is visible"
--- a/recipes/focus_de.recipe
+++ b/recipes/focus_de.recipe
@ -0,0 +1,48 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1305567197(BasicNewsRecipe):
+    title          = u'Focus (DE)'
+    __author__  = 'Anonymous'
+    language = 'de'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    remove_javascript      = True
+
+    def print_version(self, url):
+        return url + '?drucken=1'
+
+    keep_only_tags = [
+                              dict(name='div', attrs={'id':['article']}) ]
+
+    remove_tags = [dict(name='div', attrs={'class':'sidebar'}),
+                            dict(name='div', attrs={'class':'commentForm'}),
+                            dict(name='div', attrs={'class':'comment clearfix oid-3534591 open'}),
+                            dict(name='div', attrs={'class':'similarityBlock'}),
+                            dict(name='div', attrs={'class':'footer'}),
+                            dict(name='div', attrs={'class':'getMoreComments'}),
+                            dict(name='div', attrs={'class':'moreComments'}),
+                            dict(name='div', attrs={'class':'ads'}),
+                            dict(name='div', attrs={'class':'articleContent'}),
+
+
+                            ]
+    remove_tags_after = [
+                            dict(name='div',attrs={'class':['commentForm','title', 'actions clearfix']})
+                                   ]
+
+
+    feeds          = [	(u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'),
+                                        (u'Auto-News', u'http://rss2.focus.de/c/32191/f/443320/index.rss'),
+                                        (u'Digital-News', u'http://rss2.focus.de/c/32191/f/443315/index.rss'),
+                                        (u'Finanzen-News', u'http://rss2.focus.de/c/32191/f/443317/index.rss'),
+                                        (u'Gesundheit-News', u'http://rss2.focus.de/c/32191/f/443314/index.rss'),
+                                        (u'Immobilien-News', u'http://rss2.focus.de/c/32191/f/443318/index.rss'),
+                                        (u'Kultur-News', u'http://rss2.focus.de/c/32191/f/443321/index.rss'),
+		(u'Panorama-News', u'http://rss2.focus.de/c/32191/f/533877/index.rss'),
+                                        (u'Politik-News', u'http://rss2.focus.de/c/32191/f/443313/index.rss'),
+                                        (u'Reisen-News', u'http://rss2.focus.de/c/32191/f/443316/index.rss'),
+                                        (u'Sport-News', u'http://rss2.focus.de/c/32191/f/443319/index.rss'),
+                                        (u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss'),
+                         ]
--- a/recipes/national_geographic_de.recipe
+++ b/recipes/national_geographic_de.recipe
@ -0,0 +1,25 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1305567197(BasicNewsRecipe):
+    title          = u'National Geographic (DE)'
+    __author__  = 'Anonymous'
+    language = 'de'
+    oldest_article = 7
+    max_articles_per_feed = 1000
+    no_stylesheets         = True
+    use_embedded_content   = False
+    remove_javascript      = True
+    cover_url = 'http://www.nationalgeographic.de/images/national-geographic-logo.jpg'
+    keep_only_tags = [
+                              dict(name='div', attrs={'class':['contentbox_no_top_border']}) ]
+
+    remove_tags =  [
+                            dict(name='div', attrs={'class':'related'}),
+                            dict(name='li', attrs={'class':'first'}),
+                            dict(name='div', attrs={'class':'extrasbox_inner'}),
+
+                            ]
+
+    feeds          = [  (u'National Geographic', u'http://feeds.nationalgeographic.de/ng-neueste-artikel'),
+
+        ]
--- a/src/calibre/gui2/store/amazon_plugin.py
+++ b/src/calibre/gui2/store/amazon_plugin.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'

 import random
 import re
-import urllib2
+import urllib
 from contextlib import closing

 from lxml import html
@ -22,7 +22,7 @@ from calibre.gui2.store.search_result import SearchResult

 class AmazonKindleStore(StorePlugin):

-    search_url = 'http://www.amazon.com/s/url=search-alias%3Ddigital-text&field-keywords='
+    search_url = 'http://www.amazon.com/s/?url=search-alias%3Ddigital-text&field-keywords='
    details_url = 'http://amazon.com/dp/'
    drm_search_text = u'Simultaneous Device Usage'
    drm_free_text = u'Unlimited'
@ -122,13 +122,27 @@ class AmazonKindleStore(StorePlugin):
        open_url(QUrl(store_link))

    def search(self, query, max_results=10, timeout=60):
-        url =  self.search_url + urllib2.quote(query)
+        url =  self.search_url + urllib.quote_plus(query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
-            for data in doc.xpath('//div[@class="productData"]'):
+            
+            # Amazon has two results pages.
+            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
+            # Horizontal grid of books.
+            if is_shot:
+                data_xpath = '//div[contains(@class, "result")]'
+                format_xpath = './/div[@class="productTitle"]/text()'
+                cover_xpath = './/div[@class="productTitle"]//img/@src'
+            # Vertical list of books.
+            else:
+                data_xpath = '//div[@class="productData"]'
+                format_xpath = './/span[@class="format"]/text()'
+                cover_xpath = '../div[@class="productImage"]/a/img/@src'
+            
+            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                
@ -136,14 +150,14 @@ class AmazonKindleStore(StorePlugin):
                # put in results for non Kindle books (author pages). Se we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
-                type = ''.join(data.xpath('//span[@class="format"]/text()'))
-                if 'kindle' not in type.lower():
+                format = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format.lower():
                    continue
                
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin_href = None
-                asin_a = data.xpath('div[@class="productTitle"]/a[1]')
+                asin_a = data.xpath('.//div[@class="productTitle"]/a[1]')
                if asin_a:
                    asin_href = asin_a[0].get('href', '')
                    m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
@ -154,28 +168,21 @@ class AmazonKindleStore(StorePlugin):
                else:
                    continue
                
-                cover_url = ''
-                if asin_href:
-                    cover_img = data.xpath('//div[@class="productImage"]/a[@href="%s"]/img/@src' % asin_href)
-                    if cover_img:
-                        cover_url = cover_img[0]
-                        parts = cover_url.split('/')
-                        bn = parts[-1]
-                        f, _, ext = bn.rpartition('.')
-                        if '_' in f:
-                            bn = f.partition('_')[0]+'_SL160_.'+ext
-                            parts[-1] = bn
-                            cover_url = '/'.join(parts)
+                cover_url = ''.join(data.xpath(cover_xpath))

-                title = ''.join(data.xpath('div[@class="productTitle"]/a/text()'))
-                author = ''.join(data.xpath('div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
-                author = author.split('by')[-1]
-                price = ''.join(data.xpath('div[@class="newPrice"]/span/text()'))
+                title = ''.join(data.xpath('.//div[@class="productTitle"]/a/text()'))
+                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                
+                if is_shot:
+                    author = format.split(' by ')[-1]
+                else:
+                    author = ''.join(data.xpath('.//div[@class="productTitle"]/span[@class="ptBrand"]/text()'))
+                    author = author.split(' by ')[-1]
                
                counter -= 1
    
                s = SearchResult()
-                s.cover_url = cover_url
+                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
--- a/src/calibre/ptempfile.py
+++ b/src/calibre/ptempfile.py
@ -39,8 +39,20 @@ def base_dir():
        if td and os.path.exists(td):
            _base_dir = td
        else:
-            _base_dir = tempfile.mkdtemp(prefix='%s_%s_tmp_'%(__appname__,
-                __version__), dir=os.environ.get('CALIBRE_TEMP_DIR', None))
+            base = os.environ.get('CALIBRE_TEMP_DIR', None)
+            prefix = u'%s_%s_tmp_'%(__appname__, __version__)
+            try:
+                # First try an ascii path as that is what was done historically
+                # and we dont want to break working code
+                # _base_dir will be a bytestring
+                _base_dir = tempfile.mkdtemp(prefix=prefix.encode('ascii'), dir=base)
+            except:
+                # Failed to create tempdir (probably localized windows)
+                # Try unicode. This means that all temp paths created by this
+                # module will be unicode, this may cause problems elsewhere, if
+                # so, hopefully people will open tickets and they can be fixed.
+                _base_dir = tempfile.mkdtemp(prefix=prefix, dir=base)
+
            atexit.register(remove_dir, _base_dir)
    return _base_dir

--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot