[merge] from trunk

2025-07-09 03:04:10 -04:00 · 2011-10-11 10:11:22 -07:00 · 2011-10-11 10:11:22 -07:00 · 53bec8004f
commit 53bec8004f
parent 4f0fc544bd bffa00773d
18 changed files with 275 additions and 61 deletions
--- a/recipes/defensenews.recipe
+++ b/recipes/defensenews.recipe
@ -0,0 +1,64 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.defensenews.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class DefenseNews(BasicNewsRecipe):
    title                 = 'Defense News'
    __author__            = 'Darko Miletic'
    description           = 'Find late-breaking defense news from the leading defense news weekly'
    publisher             = 'Gannett Government Media Corporation'
    category              = 'defense news, defence news, defense, defence, defence budget, defence policy'
    oldest_article        = 31
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                               .info{font-size: small; color: gray}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [
                     dict(name=['meta','link'])
                    ,dict(attrs={'class':['toolbar','related','left','right']})
                  ]
    remove_tags_before = attrs={'class':'storyWrp'}
    remove_tags_after = attrs={'class':'middle'}
    remove_attributes=['lang']
    feeds = [
              (u'Europe'  , u'http://www.defensenews.com/rss/eur/'            )
             ,(u'Americas', u'http://www.defensenews.com/rss/ame/'            )
             ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/'  )
             ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
             ,(u'Air', u'http://www.defensenews.com/rss/air/'                 )
             ,(u'Land', u'http://www.defensenews.com/rss/lan/'                )
             ,(u'Naval', u'http://www.defensenews.com/rss/sea/'               )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -119,10 +119,8 @@ class Guardian(BasicNewsRecipe):
                        }
    def parse_index(self):
        try:
        feeds = []
        for title, href in self.find_sections():
            feeds.append((title, list(self.find_articles(href))))
        return feeds
-        except:
+
            raise NotImplementedError
--- a/recipes/merco_press.recipe
+++ b/recipes/merco_press.recipe
@ -0,0 +1,27 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MercoPress(BasicNewsRecipe):
    title = u'Merco Press'
    description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
    cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
    __author__ = 'Russell Phillips'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
    remove_tags = [dict(name='a')]
    feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
        ('Argentina', 'http://en.mercopress.com/rss/argentina'),
        ('Brazil', 'http://en.mercopress.com/rss/brazil'),
        ('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
        ('International News', 'http://en.mercopress.com/rss/international'),
        ('Latin America', 'http://en.mercopress.com/rss/latin-america'),
        ('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
        ('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
        ('United States', 'http://en.mercopress.com/rss/united-states'),
        ('Uruguay://en.mercopress.com/rss/uruguay')]
--- a/recipes/penguin_news.recipe
+++ b/recipes/penguin_news.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MercoPress(BasicNewsRecipe):
    title          = u'Penguin News'
    description = u"Penguin News: the Falkland Islands' only newspaper."
    cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
    language = 'en'
    __author__ = 'Russell Phillips'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    extra_css  = 'img{padding-bottom:1ex; display:block; text-align: center;}'
    feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&amp;type=rss')]
--- a/recipes/wow.recipe
+++ b/recipes/wow.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class WoW(BasicNewsRecipe):
    title          = u'WoW Insider'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('WoW',
 'http://wow.joystiq.com/rss.xml')
 ]
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -224,6 +224,9 @@ try:
 except:
    try:
        HOST=get_ip_address('wlan0')
    except:
        try:
            HOST=get_ip_address('ppp0')
        except:
            HOST='192.168.1.2'
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -20,17 +20,23 @@ for x in [
    EXCLUDES.extend(['--exclude', x])
 SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
 def get_rsync_pw():
    return open('/home/kovid/work/kde/conf/buildbot').read().partition(
                ':')[-1].strip()
 class Rsync(Command):
    description = 'Sync source tree from development machine'
    SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
-            ['rsync://{host}/work/{project}', '..'])
+            ['rsync://buildbot@{host}/work/{project}', '..'])
    def run(self, opts):
        cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
        env = dict(os.environ)
        env['RSYNC_PASSWORD'] = get_rsync_pw()
        self.info(cmd)
-        subprocess.check_call(cmd, shell=True)
+        subprocess.check_call(cmd, shell=True, env=env)
 class Push(Command):
@ -81,7 +87,8 @@ class VMInstaller(Command):
    def get_build_script(self):
-        ans = '\n'.join(self.BUILD_PREFIX)+'\n\n'
+        rs = ['export RSYNC_PASSWORD=%s'%get_rsync_pw()]
        ans = '\n'.join(self.BUILD_PREFIX + rs)+'\n\n'
        ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1144,6 +1144,16 @@ class StoreAmazonDEKindleStore(StoreBase):
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonFRKindleStore(StoreBase):
    name = 'Amazon FR Kindle'
    author = 'Charles Haley'
    description = u'Tous les ebooks Kindle'
    actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore'
    headquarters = 'DE'
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
@ -1521,6 +1531,7 @@ plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
    StoreAmazonDEKindleStore,
    StoreAmazonFRKindleStore,
    StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore,
    StoreBNStore,
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -518,3 +518,9 @@ class BookList(list):
        '''
        raise NotImplementedError()
    def prepare_addable_books(self, paths):
        '''
        Given a list of paths, returns another list of paths. These paths
        point to addable versions of the books.
        '''
        return paths
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -47,6 +47,9 @@ class PRST1(USBMS):
    WINDOWS_MAIN_MEM   = re.compile(
            r'(PRS-T1&)'
            )
    WINDOWS_CARD_A_MEM = re.compile(
            r'(PRS-T1__SD&)'
            )
    MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
@ -253,8 +256,11 @@ class PRST1(USBMS):
            # Get Metadata We Want
            lpath = book.lpath
            try:
                author = newmi.authors[0]
-            title = newmi.title
+            except:
                author = _('Unknown')
            title = newmi.title or _('Unknown')
            if lpath not in db_books:
                query = '''
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -397,6 +397,7 @@ class AddAction(InterfaceAction):
            d = error_dialog(self.gui, _('Add to library'), _('No book files found'))
            d.exec_()
            return
        paths = self.gui.device_manager.device.prepare_addable_books(paths)
        from calibre.gui2.add import Adder
        self.__adder_func = partial(self._add_from_device_adder, on_card=None,
                                                    model=view.model())
--- a/src/calibre/gui2/preferences/server.ui
+++ b/src/calibre/gui2/preferences/server.ui
@ -206,7 +206,7 @@
   <item>
    <widget class="QCheckBox" name="opt_autolaunch_server">
     <property name="text">
-      <string>Run server &amp;automatically on startup</string>
+      <string>Run server &amp;automatically when calibre starts</string>
     </property>
    </widget>
   </item>
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -37,6 +37,7 @@ class SearchRestrictionMixin(object):
        search = unicode(search)
        if not search:
            self.search_restriction.setCurrentIndex(0)
            self._apply_search_restriction('')
        else:
            s = '*' + search
            if self.search_restriction.count() > 1:
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from contextlib import closing
 from lxml import html
@ -37,27 +36,16 @@ class AmazonDEKindleStore(StorePlugin):
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url =  search_url + urllib.quote_plus(query)
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Amazon has two results pages.
            # 20110725: seems that is_shot is gone.
 #            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
 #            # Horizontal grid of books.
 #            if is_shot:
 #                data_xpath = '//div[contains(@class, "result")]'
 #                format_xpath = './/div[@class="productTitle"]/text()'
 #                cover_xpath = './/div[@class="productTitle"]//img/@src'
 #            # Vertical list of books.
 #            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
 # end is_shot else
            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -80,11 +68,9 @@ class AmazonDEKindleStore(StorePlugin):
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 #                if is_shot:
 #                    author = format.split(' von ')[-1]
 #                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
-                author = author.split('von ')[-1]
+                if author.startswith('von '):
                    author = author[4:]
                counter -= 1
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -0,0 +1,82 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonFRKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'charhale-21'}
        store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
        if detail_item:
            aff_id['asin'] = detail_item
            store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/amazon_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_plugin.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 import random
 import re
 import urllib
 from contextlib import closing
 from lxml import html
@ -122,12 +121,12 @@ class AmazonKindleStore(StorePlugin):
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
-        url =  self.search_url + urllib.quote_plus(query)
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Amazon has two results pages.
            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from contextlib import closing
 from lxml import html
@ -34,27 +33,16 @@ class AmazonUKKindleStore(StorePlugin):
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url =  search_url + urllib.quote_plus(query)
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Amazon has two results pages.
            # 20110725: seems that is_shot is gone.
 #            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
 #            # Horizontal grid of books.
 #            if is_shot:
 #                data_xpath = '//div[contains(@class, "result")]'
 #                format_xpath = './/div[@class="productTitle"]/text()'
 #                cover_xpath = './/div[@class="productTitle"]//img/@src'
 #            # Vertical list of books.
 #            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
 # end is_shot else
            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -77,11 +65,9 @@ class AmazonUKKindleStore(StorePlugin):
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 #                if is_shot:
 #                    author = format.split(' von ')[-1]
 #                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
-                author = author.split('by ')[-1]
+                if author.startswith('by '):
                    author = author[3:]
                counter -= 1
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -47,6 +47,9 @@ def get_parser(usage):
 def get_db(dbpath, options):
    if options.library_path is not None:
        dbpath = options.library_path
    if dbpath is None:
        raise ValueError('No saved library path, either run the GUI or use the'
                ' --with-library option')
    dbpath = os.path.abspath(dbpath)
    return LibraryDatabase2(dbpath)