[merge] from trunk

2025-08-30 23:00:21 -04:00 · 2011-10-11 10:11:22 -07:00 · 2011-10-11 10:11:22 -07:00 · 53bec8004f
commit 53bec8004f
parent 4f0fc544bd bffa00773d
18 changed files with 275 additions and 61 deletions
--- a/recipes/defensenews.recipe
+++ b/recipes/defensenews.recipe
@ -0,0 +1,64 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.defensenews.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class DefenseNews(BasicNewsRecipe):
+    title                 = 'Defense News'
+    __author__            = 'Darko Miletic'
+    description           = 'Find late-breaking defense news from the leading defense news weekly'
+    publisher             = 'Gannett Government Media Corporation'
+    category              = 'defense news, defence news, defense, defence, defence budget, defence policy'
+    oldest_article        = 31
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                               .info{font-size: small; color: gray}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(name=['meta','link'])
+                    ,dict(attrs={'class':['toolbar','related','left','right']})
+                  ]
+    remove_tags_before = attrs={'class':'storyWrp'}
+    remove_tags_after = attrs={'class':'middle'}
+    
+    remove_attributes=['lang']
+
+    feeds = [
+              (u'Europe'  , u'http://www.defensenews.com/rss/eur/'            )
+             ,(u'Americas', u'http://www.defensenews.com/rss/ame/'            )
+             ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/'  )
+             ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
+             ,(u'Air', u'http://www.defensenews.com/rss/air/'                 )
+             ,(u'Land', u'http://www.defensenews.com/rss/lan/'                )
+             ,(u'Naval', u'http://www.defensenews.com/rss/sea/'               )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -119,10 +119,8 @@ class Guardian(BasicNewsRecipe):
                        }

    def parse_index(self):
-        try:
        feeds = []
        for title, href in self.find_sections():
            feeds.append((title, list(self.find_articles(href))))
        return feeds
-        except:
-            raise NotImplementedError
+
--- a/recipes/merco_press.recipe
+++ b/recipes/merco_press.recipe
@ -0,0 +1,27 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MercoPress(BasicNewsRecipe):
+    title = u'Merco Press'
+    description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
+    cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
+
+    __author__ = 'Russell Phillips'
+    language = 'en'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
+    remove_tags = [dict(name='a')]
+
+    feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
+        ('Argentina', 'http://en.mercopress.com/rss/argentina'),
+        ('Brazil', 'http://en.mercopress.com/rss/brazil'),
+        ('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
+        ('International News', 'http://en.mercopress.com/rss/international'),
+        ('Latin America', 'http://en.mercopress.com/rss/latin-america'),
+        ('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
+        ('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
+        ('United States', 'http://en.mercopress.com/rss/united-states'),
+        ('Uruguay://en.mercopress.com/rss/uruguay')]
--- a/recipes/penguin_news.recipe
+++ b/recipes/penguin_news.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MercoPress(BasicNewsRecipe):
+    title          = u'Penguin News'
+    description = u"Penguin News: the Falkland Islands' only newspaper."
+    cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
+    language = 'en'
+
+    __author__ = 'Russell Phillips'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    extra_css  = 'img{padding-bottom:1ex; display:block; text-align: center;}'
+
+    feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&amp;type=rss')]
--- a/recipes/wow.recipe
+++ b/recipes/wow.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class WoW(BasicNewsRecipe):
+    title          = u'WoW Insider'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True
+
+    feeds          = [
+('WoW',
+ 'http://wow.joystiq.com/rss.xml')
+]
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -224,6 +224,9 @@ try:
 except:
    try:
        HOST=get_ip_address('wlan0')
+    except:
+        try:
+            HOST=get_ip_address('ppp0')
        except:
            HOST='192.168.1.2'

--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -20,17 +20,23 @@ for x in [
    EXCLUDES.extend(['--exclude', x])
 SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]

+def get_rsync_pw():
+    return open('/home/kovid/work/kde/conf/buildbot').read().partition(
+                ':')[-1].strip()
+
 class Rsync(Command):

    description = 'Sync source tree from development machine'

    SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
-            ['rsync://{host}/work/{project}', '..'])
+            ['rsync://buildbot@{host}/work/{project}', '..'])

    def run(self, opts):
        cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
+        env = dict(os.environ)
+        env['RSYNC_PASSWORD'] = get_rsync_pw()
        self.info(cmd)
-        subprocess.check_call(cmd, shell=True)
+        subprocess.check_call(cmd, shell=True, env=env)


 class Push(Command):
@ -81,7 +87,8 @@ class VMInstaller(Command):


    def get_build_script(self):
-        ans = '\n'.join(self.BUILD_PREFIX)+'\n\n'
+        rs = ['export RSYNC_PASSWORD=%s'%get_rsync_pw()]
+        ans = '\n'.join(self.BUILD_PREFIX + rs)+'\n\n'
        ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1144,6 +1144,16 @@ class StoreAmazonDEKindleStore(StoreBase):
    formats = ['KINDLE']
    affiliate = True

+class StoreAmazonFRKindleStore(StoreBase):
+    name = 'Amazon FR Kindle'
+    author = 'Charles Haley'
+    description = u'Tous les ebooks Kindle'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore'
+
+    headquarters = 'DE'
+    formats = ['KINDLE']
+    affiliate = True
+
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
@ -1521,6 +1531,7 @@ plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
    StoreAmazonDEKindleStore,
+    StoreAmazonFRKindleStore,
    StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore,
    StoreBNStore,
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -518,3 +518,9 @@ class BookList(list):
        '''
        raise NotImplementedError()

+    def prepare_addable_books(self, paths):
+        '''
+        Given a list of paths, returns another list of paths. These paths
+        point to addable versions of the books.
+        '''
+        return paths
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -47,6 +47,9 @@ class PRST1(USBMS):
    WINDOWS_MAIN_MEM   = re.compile(
            r'(PRS-T1&)'
            )
+    WINDOWS_CARD_A_MEM = re.compile(
+            r'(PRS-T1__SD&)'
+            )
    MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'

@ -253,8 +256,11 @@ class PRST1(USBMS):

            # Get Metadata We Want
            lpath = book.lpath
+            try:
                author = newmi.authors[0]
-            title = newmi.title
+            except:
+                author = _('Unknown')
+            title = newmi.title or _('Unknown')

            if lpath not in db_books:
                query = '''
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -397,6 +397,7 @@ class AddAction(InterfaceAction):
            d = error_dialog(self.gui, _('Add to library'), _('No book files found'))
            d.exec_()
            return
+        paths = self.gui.device_manager.device.prepare_addable_books(paths)
        from calibre.gui2.add import Adder
        self.__adder_func = partial(self._add_from_device_adder, on_card=None,
                                                    model=view.model())
--- a/src/calibre/gui2/preferences/server.ui
+++ b/src/calibre/gui2/preferences/server.ui
@ -206,7 +206,7 @@
   <item>
    <widget class="QCheckBox" name="opt_autolaunch_server">
     <property name="text">
-      <string>Run server &amp;automatically on startup</string>
+      <string>Run server &amp;automatically when calibre starts</string>
     </property>
    </widget>
   </item>
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -37,6 +37,7 @@ class SearchRestrictionMixin(object):
        search = unicode(search)
        if not search:
            self.search_restriction.setCurrentIndex(0)
+            self._apply_search_restriction('')
        else:
            s = '*' + search
            if self.search_restriction.count() > 1:
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import urllib
 from contextlib import closing

 from lxml import html
@ -37,27 +36,16 @@ class AmazonDEKindleStore(StorePlugin):

    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url =  search_url + urllib.quote_plus(query)
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))

-            # Amazon has two results pages.
-            # 20110725: seems that is_shot is gone.
-#            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-#            # Horizontal grid of books.
-#            if is_shot:
-#                data_xpath = '//div[contains(@class, "result")]'
-#                format_xpath = './/div[@class="productTitle"]/text()'
-#                cover_xpath = './/div[@class="productTitle"]//img/@src'
-#            # Vertical list of books.
-#            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
-# end is_shot else

            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -80,11 +68,9 @@ class AmazonDEKindleStore(StorePlugin):
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))

-#                if is_shot:
-#                    author = format.split(' von ')[-1]
-#                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
-                author = author.split('von ')[-1]
+                if author.startswith('von '):
+                    author = author[4:]

                counter -= 1

--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.search_result import SearchResult
+
+class AmazonFRKindleStore(StorePlugin):
+    '''
+    For comments on the implementation, please see amazon_plugin.py
+    '''
+
+    def open(self, parent=None, detail_item=None, external=False):
+        aff_id = {'tag': 'charhale-21'}
+        store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
+
+        if detail_item:
+            aff_id['asin'] = detail_item
+            store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
+        open_url(QUrl(store_link))
+
+    def search(self, query, max_results=10, timeout=60):
+        search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
+        br = browser()
+
+        counter = max_results
+        with closing(br.open(url, timeout=timeout)) as f:
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
+
+            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
+            format_xpath = './/span[@class="format"]/text()'
+            cover_xpath = './/img[@class="productImage"]/@src'
+
+            for data in doc.xpath(data_xpath):
+                if counter <= 0:
+                    break
+
+                # Even though we are searching digital-text only Amazon will still
+                # put in results for non Kindle books (author pages). So we need
+                # to explicitly check if the item is a Kindle book and ignore it
+                # if it isn't.
+                format = ''.join(data.xpath(format_xpath))
+                if 'kindle' not in format.lower():
+                    continue
+
+                # We must have an asin otherwise we can't easily reference the
+                # book later.
+                asin = ''.join(data.xpath("@name"))
+
+                cover_url = ''.join(data.xpath(cover_xpath))
+
+                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
+                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
+                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
+                if author.startswith('de '):
+                    author = author[3:]
+
+                counter -= 1
+
+                s = SearchResult()
+                s.cover_url = cover_url.strip()
+                s.title = title.strip()
+                s.author = author.strip()
+                s.price = price.strip()
+                s.detail_item = asin.strip()
+                s.formats = 'Kindle'
+                s.drm = SearchResult.DRM_UNKNOWN
+
+                yield s
--- a/src/calibre/gui2/store/stores/amazon_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_plugin.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'

 import random
 import re
-import urllib
 from contextlib import closing

 from lxml import html
@ -122,12 +121,12 @@ class AmazonKindleStore(StorePlugin):
        open_url(QUrl(store_link))

    def search(self, query, max_results=10, timeout=60):
-        url =  self.search_url + urllib.quote_plus(query)
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            
            # Amazon has two results pages.
            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import urllib
 from contextlib import closing

 from lxml import html
@ -34,27 +33,16 @@ class AmazonUKKindleStore(StorePlugin):

    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url =  search_url + urllib.quote_plus(query)
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))

-            # Amazon has two results pages.
-            # 20110725: seems that is_shot is gone.
-#            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
-#            # Horizontal grid of books.
-#            if is_shot:
-#                data_xpath = '//div[contains(@class, "result")]'
-#                format_xpath = './/div[@class="productTitle"]/text()'
-#                cover_xpath = './/div[@class="productTitle"]//img/@src'
-#            # Vertical list of books.
-#            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
-# end is_shot else

            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -77,11 +65,9 @@ class AmazonUKKindleStore(StorePlugin):
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))

-#                if is_shot:
-#                    author = format.split(' von ')[-1]
-#                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
-                author = author.split('by ')[-1]
+                if author.startswith('by '):
+                    author = author[3:]

                counter -= 1

--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -47,6 +47,9 @@ def get_parser(usage):
 def get_db(dbpath, options):
    if options.library_path is not None:
        dbpath = options.library_path
+    if dbpath is None:
+        raise ValueError('No saved library path, either run the GUI or use the'
+                ' --with-library option')
    dbpath = os.path.abspath(dbpath)
    return LibraryDatabase2(dbpath)