Merge from trunk

2025-08-30 23:00:21 -04:00 · 2011-10-26 09:07:58 +02:00 · 2011-10-26 09:07:58 +02:00 · 728be0ff52
commit 728be0ff52
parent dc7487d00a 7bf888bd61
7 changed files with 145 additions and 40 deletions
--- a/recipes/icons/metro_news_nl.png
+++ b/recipes/icons/metro_news_nl.png
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -1,9 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.utils.magick import Image, PixelWand, create_canvas
+
+
+''' Version 1.2, updated cover image to match the changed website.
+ added info date on title
+ version 1.4 Updated tags, delay and added autoclean 22-09-2011
+ version 1.5 Changes due to changes in site
+ version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes 
+    Added som processing on pictures
+    Removed links in html
+    Removed extre white characters
+    changed handling of self closing span
+ '''

 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title = u'Metro Nieuws NL'
-# Version 1.2, updated cover image to match the changed website.
-# added info date on title
    oldest_article = 2
    max_articles_per_feed = 100
    __author__     = u'DrMerry'
@ -11,8 +24,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    language       = u'nl'
    simultaneous_downloads = 5
    #delay          = 1
-    auto_cleanup = True
-    auto_cleanup_keep = '//div[@class="article-image-caption-2column"]|//div[@id="date"]'
+    #auto_cleanup = True
+    #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
    timefmt        = ' [%A, %d %b %Y]'
    no_stylesheets = True
    remove_javascript = True
@ -20,22 +33,74 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    cover_url      = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
    publication_type = 'newspaper'
    remove_tags_before = dict(name='div', attrs={'id':'date'})
-    remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
+    remove_tags_after = dict(name='div', attrs={'class':'article-body'})
    encoding              = 'utf-8'
-    extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph,  p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}'
+    remove_attributes = ['style', 'font', 'width', 'height']
+    use_embedded_content = False
+    extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
+        #date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
+        .article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
+        h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
+        .article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
+        div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
+        p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
+        p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
+        div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
+        div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
+        img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
+        
+    keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
+        dict(name='div', attrs={'id':['date']}),
+        dict(name='h1', attrs={'class':['title']}),
+        dict(name='h2', attrs={'class':['subtitle']})]
    
    remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
        'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
        'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
-        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}),
+        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
        dict(name='iframe')]

+    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
+        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
+        (re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
+            lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
+        ]
+
+    def postprocess_html(self, soup, first):
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+        #width, height = img.size
+        #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height 
+            pw = PixelWand()
+            img.trim(0)
+            img.save(iurl)
+            '''
+            #width, height = img.size
+            #print '***TRIMMED img width is: ', width, 'height is: ', height 
+            left=0
+            top=0
+            border_color='#ffffff'
+            width, height = img.size
+            #print '***retrieved img width is: ', width, 'height is: ', height 
+            height_correction = 1.17
+            canvas = create_canvas(width, height*height_correction,border_color)
+            canvas.compose(img, left, top)
+            #img = canvas
+            canvas.save(iurl)
+            #width, height = canvas.size
+            #print '***NEW img width is: ', width, 'height is: ', height 
+            '''
+        return soup
+        
    feeds = [
        (u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
        (u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
        (u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
        (u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
        (u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
+        (u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
        (u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
        (u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
        (u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
--- a/recipes/the_oz.recipe
+++ b/recipes/the_oz.recipe
@ -12,21 +12,18 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class DailyTelegraph(BasicNewsRecipe):
    title          = u'The Australian'
    __author__     = u'Matthew Briggs and Sujata Raman'
-    description    = u'National broadsheet newspaper from down under - colloquially known as The Oz'
+    description    = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
+            '. You will need to have a subscription to '
+            'http://www.theaustralian.com.au to get full articles.')
    language = 'en_AU'

    oldest_article = 2
+    needs_subscription = 'optional'
    max_articles_per_feed = 30
    remove_javascript      = True
    no_stylesheets         = True
    encoding               = 'utf8'

-    html2lrf_options = [
-                          '--comment'       , description
-                        , '--category'      , 'news, Australia'
-                        , '--publisher'     , title
-                        ]
-
    keep_only_tags = [dict(name='div', attrs={'id': 'story'})]

    #remove_tags = [dict(name=['object','link'])]
@ -67,6 +64,19 @@ class DailyTelegraph(BasicNewsRecipe):
                    (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
                    (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]

+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username and self.password:
+            br.open('http://www.theaustralian.com.au')
+            br.select_form(nr=0)
+            br['username'] = self.username
+            br['password'] = self.password
+            raw = br.submit().read()
+            if '>log out' not in raw.lower():
+                raise ValueError('Failed to log in to www.theaustralian.com.au'
+                        ' are your username and password correct?')
+        return br
+
    def get_article_url(self, article):
        return article.id

@ -76,14 +86,4 @@ class DailyTelegraph(BasicNewsRecipe):

        #return br.geturl()

-    def get_cover_url(self):

-        href =  'http://www.theaustralian.news.com.au/'
-
-        soup = self.index_to_soup(href)
-        img = soup.find('img',alt ="AUS HP promo digital2")
-        print img
-        if img :
-           cover_url = img['src']
-
-        return cover_url
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -855,6 +855,7 @@ class DeviceMixin(object): # {{{
        Force the library view to refresh, taking into consideration new
        device books information
        '''
+        with self.library_view.preserve_state():
            self.book_on_device(None, reset=True)
            if reset_only:
                return
@ -1319,7 +1320,7 @@ class DeviceMixin(object): # {{{
        # If it does not, then do it here.
        if not self.set_books_in_library(self.booklists(), reset=True, add_as_step_to_job=job):
            self.upload_booklists(job)
-        with self.library_view.preserve_selected_books:
+        with self.library_view.preserve_state():
            self.book_on_device(None, reset=True)
            self.refresh_ondevice()

--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -23,24 +23,43 @@ from calibre.gui2.library import DEFAULT_SORT
 from calibre.constants import filesystem_encoding
 from calibre import force_unicode

-class PreserveSelection(object): # {{{
+class PreserveViewState(object): # {{{

    '''
    Save the set of selected books at enter time. If at exit time there are no
-    selected books, restore the previous selection.
+    selected books, restore the previous selection, the previous current index
+    and dont affect the scroll position.
    '''

-    def __init__(self, view):
+    def __init__(self, view, preserve_hpos=True, preserve_vpos=True):
        self.view = view
-        self.selected_ids = []
+        self.selected_ids = set()
+        self.current_id = None
+        self.preserve_hpos = preserve_hpos
+        self.preserve_vpos = preserve_vpos
+        self.vscroll = self.hscroll = 0

    def __enter__(self):
+        try:
            self.selected_ids = self.view.get_selected_ids()
+            self.current_id = self.view.current_id
+            self.vscroll = self.view.verticalScrollBar().value()
+            self.hscroll = self.view.horizontalScrollBar().value()
+        except:
+            import traceback
+            traceback.print_exc()

    def __exit__(self, *args):
        current = self.view.get_selected_ids()
-        if not current:
-            self.view.select_rows(self.selected_ids, using_ids=True)
+        if not current and self.selected_ids:
+            if self.current_id is not None:
+                self.view.current_id = self.current_id
+            self.view.select_rows(self.selected_ids, using_ids=True,
+                    scroll=False, change_current=self.current_id is None)
+            if self.preserve_vpos:
+                self.view.verticalScrollBar().setValue(self.vscroll)
+            if self.preserve_hpos:
+                self.view.horizontalScrollBar().setValue(self.hscroll)
 # }}}

 class BooksView(QTableView): # {{{
@ -104,7 +123,7 @@ class BooksView(QTableView): # {{{
        self.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.setSortingEnabled(True)
        self.selectionModel().currentRowChanged.connect(self._model.current_changed)
-        self.preserve_selected_books = PreserveSelection(self)
+        self.preserve_state = partial(PreserveViewState, self)

        # {{{ Column Header setup
        self.can_add_columns = True
@ -788,6 +807,23 @@ class BooksView(QTableView): # {{{
                ans.append(i)
        return ans

+    @dynamic_property
+    def current_id(self):
+        def fget(self):
+            try:
+                return self.model().id(self.currentIndex())
+            except:
+                pass
+            return None
+        def fset(self, val):
+            if val is None: return
+            m = self.model()
+            for row in xrange(m.rowCount(QModelIndex())):
+                if m.id(row) == val:
+                    self.set_current_row(row, select=False)
+                    break
+        return property(fget=fget, fset=fset)
+
    def close(self):
        self._model.close()

--- a/src/calibre/gui2/preferences/sending.py
+++ b/src/calibre/gui2/preferences/sending.py
@ -29,10 +29,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                (_('Only on send'), 'on_send'),
                (_('Automatic management'), 'on_connect')]
        r('manage_device_metadata', prefs, choices=choices)
+
        if gui.device_manager.is_device_connected:
            self.opt_manage_device_metadata.setEnabled(False)
            self.opt_manage_device_metadata.setToolTip(
                _('Cannot change metadata management while a device is connected'))
+            self.mm_label.setText('Metadata management (disabled while '
+                    'device connected)')

        self.send_template.changed_signal.connect(self.changed_signal.emit)

--- a/src/calibre/gui2/preferences/sending.ui
+++ b/src/calibre/gui2/preferences/sending.ui
@ -15,7 +15,7 @@
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0">
-    <widget class="QLabel" name="label_4">
+    <widget class="QLabel" name="mm_label">
     <property name="sizePolicy">
      <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
       <horstretch>0</horstretch>