Sync to trunk.

2025-08-05 08:40:13 -04:00 · 2009-06-08 18:35:12 -04:00 · 2009-06-08 18:35:12 -04:00 · 17e95213ad
commit 17e95213ad
parent dcaeca5d1c 9266cfb0a5
16 changed files with 364 additions and 36 deletions
--- a/installer/osx/freeze.py
+++ b/installer/osx/freeze.py
@ -19,7 +19,7 @@ from modulegraph.find_modules import find_modules
 PYTHON = '/Library/Frameworks/Python.framework/Versions/Current/bin/python'

 class BuildAPP(py2app):
-    QT_PREFIX = '/Users/kovid/qt'
+    QT_PREFIX = '/Volumes/sw/qt'
    LOADER_TEMPLATE = \
 r'''#!/usr/bin/env python
 import os, sys, glob
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.0b2'
+__version__   = '0.6.0b3'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -24,12 +24,12 @@ class PRS505(CLI, Device):

    VENDOR_ID    = [0x054c]   #: SONY Vendor Id
    PRODUCT_ID   = [0x031e]   #: Product Id for the PRS-505
-    BCD          = [0x229]  #: Needed to disambiguate 505 and 700 on linux
+    BCD          = [0x229, 0x1000]  #: Needed to disambiguate 505 and 700 on linux

    VENDOR_NAME  = 'SONY'
-    WINDOWS_MAIN_MEM = 'PRS-505'
-    WINDOWS_CARD_A_MEM = 'PRS-505/UC:MS'
-    WINDOWS_CARD_B_MEM = 'PRS-505/UC:SD'
+    WINDOWS_MAIN_MEM   = 'PRS-505'
+    WINDOWS_CARD_A_MEM = ['PRS-505/UC:MS', 'PRS-505/CE:MS']
+    WINDOWS_CARD_B_MEM = ['PRS-505/UC:SD', 'PRS-505/CE:SD']

    OSX_MAIN_MEM = 'Sony PRS-505/UC Media'
    OSX_CARD_A_MEM = 'Sony PRS-505/UC:MS Media'
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -179,13 +179,19 @@ class Device(DeviceConfig, DevicePlugin):

        return (msz, casz, cbsz)

-    def windows_match_device(self, pnp_id, device_id):
-        pnp_id = pnp_id.upper()
+    def windows_match_device(self, drive, attr):
+        pnp_id = str(drive.PNPDeviceID).upper()
+        device_id = getattr(self, attr)
+        if device_id is None or \
+                'VEN_' + str(self.VENDOR_NAME).upper() not in pnp_id:
+            return False
+        if isinstance(device_id, basestring):
+            device_id = [device_id]

-        if device_id and pnp_id is not None:
-            device_id = device_id.upper()
+        for x in device_id:
+            x = x.upper()

-            if 'VEN_' + self.VENDOR_NAME in pnp_id and 'PROD_' + device_id in pnp_id:
+            if 'PROD_' + x in pnp_id:
                return True

        return False
@ -211,18 +217,32 @@ class Device(DeviceConfig, DevicePlugin):
        return drives

    def open_windows(self):
+
+        def matches_q(drive, attr):
+            q = getattr(self, attr)
+            if q is None: return False
+            if isinstance(q, basestring):
+                q = [q]
+            pnp = str(drive.PNPDeviceID)
+            for x in q:
+                if x in pnp:
+                    return True
+            return False
+
+
        time.sleep(6)
        drives = {}
        wmi = __import__('wmi', globals(), locals(), [], -1)
        c = wmi.WMI(find_classes=False)
        for drive in c.Win32_DiskDrive():
-            if self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_A_MEM):
+            if self.windows_match_device(drive, 'WINDOWS_CARD_A_MEM'):
                drives['carda'] = self.windows_get_drive_prefix(drive)
-            elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_B_MEM):
+            elif self.windows_match_device(drive, 'WINDOWS_CARD_B_MEM'):
                drives['cardb'] = self.windows_get_drive_prefix(drive)
-            elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_MAIN_MEM):
+            elif self.windows_match_device(drive, 'WINDOWS_MAIN_MEM'):
                drives['main'] = self.windows_get_drive_prefix(drive)
-            if 'main' in drives.keys() and 'carda' in drives.keys() and 'cardb' in drives.keys():
+            if 'main' in drives.keys() and 'carda' in drives.keys() and \
+                    'cardb' in drives.keys():
                break

        if 'main' not in drives:
--- a/src/calibre/gui2/images/news/elperiodico_catalan.png
+++ b/src/calibre/gui2/images/news/elperiodico_catalan.png
--- a/src/calibre/gui2/images/news/elperiodico_spanish.png
+++ b/src/calibre/gui2/images/news/elperiodico_spanish.png
--- a/src/calibre/gui2/images/news/expansion_spanish.png
+++ b/src/calibre/gui2/images/news/expansion_spanish.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -45,7 +45,9 @@ recipe_modules = ['recipe_' + r for r in (
           'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
           'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese',
           'climate_progress', 'carta', 'slashdot', 'publico',
-           'the_budget_fashionista'
+           'the_budget_fashionista', 'elperiodico_catalan',
+           'elperiodico_spanish', 'expansion_spanish', 'lavanguardia',
+           'marca',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_elperiodico_catalan.py
+++ b/src/calibre/web/feeds/recipes/recipe_elperiodico_catalan.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elperiodico.cat
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class ElPeriodico_cat(BasicNewsRecipe):
+    title                 = 'El Periodico de Catalunya'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde Catalunya'
+    publisher             = 'elperiodico.cat'
+    category              = 'news, politics, Spain, Catalunya'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'cp1252'
+    language              = _('Catalan')
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
+
+    remove_tags        = [
+                              dict(name=['object','link','script'])
+                             ,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
+                             ,dict(name='div', attrs={'id':'inferiores'})
+                         ]
+
+    def print_version(self, url):
+        return url.replace('/default.asp?','/print.asp?')
+
+    def preprocess_html(self, soup):
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_elperiodico_spanish.py
+++ b/src/calibre/web/feeds/recipes/recipe_elperiodico_spanish.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elperiodico.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class ElPeriodico_esp(BasicNewsRecipe):
+    title                 = 'El Periodico de Catalunya'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias desde Catalunya'
+    publisher             = 'elperiodico.com'
+    category              = 'news, politics, Spain, Catalunya'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'cp1252'
+    language              = _('Spanish')
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
+
+    remove_tags        = [
+                              dict(name=['object','link','script'])
+                             ,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
+                             ,dict(name='div', attrs={'id':'inferiores'})
+                         ]
+
+    def print_version(self, url):
+        return url.replace('/default.asp?','/print.asp?')
+
+    def preprocess_html(self, soup):
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_expansion_spanish.py
+++ b/src/calibre/web/feeds/recipes/recipe_expansion_spanish.py
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.expansion.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class Expansion(BasicNewsRecipe):
+    title                 = 'Diario Expansion'
+    __author__            = 'Darko Miletic'
+    description           = 'Lider de informacion de mercados, economica y politica'
+    publisher             = 'expansion.com'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'iso-8859-15'
+    language              = _('Spanish')
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [
+                            (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
+                           ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
+                         ]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
+                         ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
+
+    def preprocess_html(self, soup):
+        soup.html['dir' ] = self.direction
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_harpers.py
+++ b/src/calibre/web/feeds/recipes/recipe_harpers.py
@ -6,6 +6,7 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 harpers.org
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag

 class Harpers(BasicNewsRecipe):
    title                 = u"Harper's Magazine"
@ -18,23 +19,30 @@ class Harpers(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_javascript     = True

    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
-    
-    
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+
+
    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
    remove_tags = [
-                     dict(name='table', attrs={'class':'rcnt'})
-                    ,dict(name='table', attrs={'class':'rcnt topline'})
+                     dict(name='table', attrs={'class':['rcnt','rcnt topline']})
                    ,dict(name=['link','object','embed'])
                  ]

    feeds       = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]

+    def preprocess_html(self, soup):
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(1,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(xmlns=True):
+            del item['xmlns']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_harpers_full.py
+++ b/src/calibre/web/feeds/recipes/recipe_harpers_full.py
@ -9,40 +9,38 @@ images and pdf's are ignored
 '''

 from calibre import strftime
-
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag

 class Harpers_full(BasicNewsRecipe):
    title                 = u"Harper's Magazine - articles from printed edition"
    __author__            = u'Darko Miletic'
    description           = u"Harper's Magazine: Founded June 1850."
    publisher             = "Harpers's"
-    category              = 'news, politics, USA'    
+    category              = 'news, politics, USA'
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    simultaneous_downloads = 1
    delay                  = 1
    language               = _('English')
    needs_subscription = True
    INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
    LOGIN = 'http://www.harpers.org'
    cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
-    remove_javascript     = True
-    
+
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'

    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
    remove_tags = [
-                     dict(name='table', attrs={'class':'rcnt'})
-                    ,dict(name='table', attrs={'class':'rcnt topline'})
+                     dict(name='table', attrs={'class':['rcnt','rcnt topline']})
+                    ,dict(name='link')
                  ]

    def get_browser(self):
@ -54,13 +52,13 @@ class Harpers_full(BasicNewsRecipe):
            br['password'] = self.password
            br.submit()
        return br
-        
+
    def parse_index(self):
        articles = []
        print 'Processing ' + self.INDEX
        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('div', attrs={'class':'title'}):
-            text_link = item.parent.find('img',attrs={'alt':'Text'})            
+            text_link = item.parent.find('img',attrs={'alt':'Text'})
            if text_link:
                url   = self.LOGIN + item.a['href']
                title = item.a.contents[0]
@ -72,4 +70,12 @@ class Harpers_full(BasicNewsRecipe):
                                 ,'description':''
                                })
        return [(soup.head.title.string, articles)]
-        
+
+    def preprocess_html(self, soup):
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(1,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(xmlns=True):
+            del item['xmlns']
+        return soup
--- a/src/calibre/web/feeds/recipes/recipe_lavanguardia.py
+++ b/src/calibre/web/feeds/recipes/recipe_lavanguardia.py
@ -0,0 +1,69 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.lavanguardia.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class LaVanguardia(BasicNewsRecipe):
+    title                 = 'La Vanguardia Digital'
+    __author__            = 'Darko Miletic'
+    description           = u'Noticias desde España'
+    publisher             = 'La Vanguardia'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'cp1252'
+    language              = _('Spanish')
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [
+                            (u'Ciudadanos'           , u'http://feeds.feedburner.com/lavanguardia/ciudadanos'   )
+                           ,(u'Cultura'              , u'http://feeds.feedburner.com/lavanguardia/cultura'      )
+                           ,(u'Deportes'             , u'http://feeds.feedburner.com/lavanguardia/deportes'     )
+                           ,(u'Economia'             , u'http://feeds.feedburner.com/lavanguardia/economia'     )
+                           ,(u'El lector opina'      , u'http://feeds.feedburner.com/lavanguardia/lectoropina'  )
+                           ,(u'Gente y TV'           , u'http://feeds.feedburner.com/lavanguardia/gente'        )
+                           ,(u'Internacional'        , u'http://feeds.feedburner.com/lavanguardia/internacional')
+                           ,(u'Internet y tecnologia', u'http://feeds.feedburner.com/lavanguardia/internet'     )
+                           ,(u'Motor'                , u'http://feeds.feedburner.com/lavanguardia/motor'        )
+                           ,(u'Politica'             , u'http://feeds.feedburner.com/lavanguardia/politica'     )
+                           ,(u'Sucessos'             , u'http://feeds.feedburner.com/lavanguardia/sucesos'      )
+                         ]
+
+
+    keep_only_tags = [
+                       dict(name='div', attrs={'class':'element1_3'})
+                     ]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='div', attrs={'class':['colC','peu']})
+                         ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'text'})]
+
+    def preprocess_html(self, soup):
+        soup.html['dir' ] = self.direction
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_marca.py
+++ b/src/calibre/web/feeds/recipes/recipe_marca.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.marca.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class Marca(BasicNewsRecipe):
+    title                 = 'Marca'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias deportivas'
+    publisher             = 'marca.com'
+    category              = 'news, sports, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    delay                 = 1
+    encoding              = 'iso-8859-15'
+    language              = _('Spanish')
+    direction             = 'ltr'
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')]
+
+    keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='div', attrs={'class':['colC','peu']})
+                            ,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']})
+                         ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})]
+
+    def preprocess_html(self, soup):
+        soup.html['dir' ] = self.direction
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/www/settings.py
+++ b/src/calibre/www/settings.py
@ -45,7 +45,7 @@ LANGUAGE_CODE = 'en-us'

 # If you set this to False, Django will make some optimizations so as not
 # to load the internationalization machinery.
-USE_I18N = False
+USE_I18N = True

 # List of callables that know how to import templates from various sources.
 TEMPLATE_LOADERS = (