Merge from trunk

This commit is contained in:
Charles Haley 2011-10-26 09:07:58 +02:00
commit 728be0ff52
7 changed files with 145 additions and 40 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 712 B

View File

@ -1,9 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.utils.magick import Image, PixelWand, create_canvas
''' Version 1.2, updated cover image to match the changed website.
added info date on title
version 1.4 Updated tags, delay and added autoclean 22-09-2011
version 1.5 Changes due to changes in site
version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
Added som processing on pictures
Removed links in html
Removed extre white characters
changed handling of self closing span
'''
class AdvancedUserRecipe1306097511(BasicNewsRecipe): class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro Nieuws NL' title = u'Metro Nieuws NL'
# Version 1.2, updated cover image to match the changed website.
# added info date on title
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = u'DrMerry' __author__ = u'DrMerry'
@ -11,8 +24,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
language = u'nl' language = u'nl'
simultaneous_downloads = 5 simultaneous_downloads = 5
#delay = 1 #delay = 1
auto_cleanup = True #auto_cleanup = True
auto_cleanup_keep = '//div[@class="article-image-caption-2column"]|//div[@id="date"]' #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
timefmt = ' [%A, %d %b %Y]' timefmt = ' [%A, %d %b %Y]'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
@ -20,22 +33,74 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg' cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
publication_type = 'newspaper' publication_type = 'newspaper'
remove_tags_before = dict(name='div', attrs={'id':'date'}) remove_tags_before = dict(name='div', attrs={'id':'date'})
remove_tags_after = dict(name='div', attrs={'id':'column-1-3'}) remove_tags_after = dict(name='div', attrs={'class':'article-body'})
encoding = 'utf-8' encoding = 'utf-8'
extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph, p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}' remove_attributes = ['style', 'font', 'width', 'height']
use_embedded_content = False
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
#date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
.article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
.article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
dict(name='div', attrs={'id':['date']}),
dict(name='h1', attrs={'class':['title']}),
dict(name='h2', attrs={'class':['subtitle']})]
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links', 'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}), 'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}), dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
dict(name='iframe')] dict(name='iframe')]
preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
(re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
]
def postprocess_html(self, soup, first):
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
#width, height = img.size
#print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
pw = PixelWand()
img.trim(0)
img.save(iurl)
'''
#width, height = img.size
#print '***TRIMMED img width is: ', width, 'height is: ', height
left=0
top=0
border_color='#ffffff'
width, height = img.size
#print '***retrieved img width is: ', width, 'height is: ', height
height_correction = 1.17
canvas = create_canvas(width, height*height_correction,border_color)
canvas.compose(img, left, top)
#img = canvas
canvas.save(iurl)
#width, height = canvas.size
#print '***NEW img width is: ', width, 'height is: ', height
'''
return soup
feeds = [ feeds = [
(u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'), (u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
(u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'), (u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
(u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'), (u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
(u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'), (u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
(u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'), (u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
(u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
(u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'), (u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
(u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'), (u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
(u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'), (u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),

View File

@ -12,21 +12,18 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe): class DailyTelegraph(BasicNewsRecipe):
title = u'The Australian' title = u'The Australian'
__author__ = u'Matthew Briggs and Sujata Raman' __author__ = u'Matthew Briggs and Sujata Raman'
description = u'National broadsheet newspaper from down under - colloquially known as The Oz' description = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
'. You will need to have a subscription to '
'http://www.theaustralian.com.au to get full articles.')
language = 'en_AU' language = 'en_AU'
oldest_article = 2 oldest_article = 2
needs_subscription = 'optional'
max_articles_per_feed = 30 max_articles_per_feed = 30
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Australia'
, '--publisher' , title
]
keep_only_tags = [dict(name='div', attrs={'id': 'story'})] keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
#remove_tags = [dict(name=['object','link'])] #remove_tags = [dict(name=['object','link'])]
@ -67,6 +64,19 @@ class DailyTelegraph(BasicNewsRecipe):
(u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'), (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')] (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username and self.password:
br.open('http://www.theaustralian.com.au')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()
if '>log out' not in raw.lower():
raise ValueError('Failed to log in to www.theaustralian.com.au'
' are your username and password correct?')
return br
def get_article_url(self, article): def get_article_url(self, article):
return article.id return article.id
@ -76,14 +86,4 @@ class DailyTelegraph(BasicNewsRecipe):
#return br.geturl() #return br.geturl()
def get_cover_url(self):
href = 'http://www.theaustralian.news.com.au/'
soup = self.index_to_soup(href)
img = soup.find('img',alt ="AUS HP promo digital2")
print img
if img :
cover_url = img['src']
return cover_url

View File

@ -855,6 +855,7 @@ class DeviceMixin(object): # {{{
Force the library view to refresh, taking into consideration new Force the library view to refresh, taking into consideration new
device books information device books information
''' '''
with self.library_view.preserve_state():
self.book_on_device(None, reset=True) self.book_on_device(None, reset=True)
if reset_only: if reset_only:
return return
@ -1319,7 +1320,7 @@ class DeviceMixin(object): # {{{
# If it does not, then do it here. # If it does not, then do it here.
if not self.set_books_in_library(self.booklists(), reset=True, add_as_step_to_job=job): if not self.set_books_in_library(self.booklists(), reset=True, add_as_step_to_job=job):
self.upload_booklists(job) self.upload_booklists(job)
with self.library_view.preserve_selected_books: with self.library_view.preserve_state():
self.book_on_device(None, reset=True) self.book_on_device(None, reset=True)
self.refresh_ondevice() self.refresh_ondevice()

View File

@ -23,24 +23,43 @@ from calibre.gui2.library import DEFAULT_SORT
from calibre.constants import filesystem_encoding from calibre.constants import filesystem_encoding
from calibre import force_unicode from calibre import force_unicode
class PreserveSelection(object): # {{{ class PreserveViewState(object): # {{{
''' '''
Save the set of selected books at enter time. If at exit time there are no Save the set of selected books at enter time. If at exit time there are no
selected books, restore the previous selection. selected books, restore the previous selection, the previous current index
and dont affect the scroll position.
''' '''
def __init__(self, view): def __init__(self, view, preserve_hpos=True, preserve_vpos=True):
self.view = view self.view = view
self.selected_ids = [] self.selected_ids = set()
self.current_id = None
self.preserve_hpos = preserve_hpos
self.preserve_vpos = preserve_vpos
self.vscroll = self.hscroll = 0
def __enter__(self): def __enter__(self):
try:
self.selected_ids = self.view.get_selected_ids() self.selected_ids = self.view.get_selected_ids()
self.current_id = self.view.current_id
self.vscroll = self.view.verticalScrollBar().value()
self.hscroll = self.view.horizontalScrollBar().value()
except:
import traceback
traceback.print_exc()
def __exit__(self, *args): def __exit__(self, *args):
current = self.view.get_selected_ids() current = self.view.get_selected_ids()
if not current: if not current and self.selected_ids:
self.view.select_rows(self.selected_ids, using_ids=True) if self.current_id is not None:
self.view.current_id = self.current_id
self.view.select_rows(self.selected_ids, using_ids=True,
scroll=False, change_current=self.current_id is None)
if self.preserve_vpos:
self.view.verticalScrollBar().setValue(self.vscroll)
if self.preserve_hpos:
self.view.horizontalScrollBar().setValue(self.hscroll)
# }}} # }}}
class BooksView(QTableView): # {{{ class BooksView(QTableView): # {{{
@ -104,7 +123,7 @@ class BooksView(QTableView): # {{{
self.setSelectionBehavior(QAbstractItemView.SelectRows) self.setSelectionBehavior(QAbstractItemView.SelectRows)
self.setSortingEnabled(True) self.setSortingEnabled(True)
self.selectionModel().currentRowChanged.connect(self._model.current_changed) self.selectionModel().currentRowChanged.connect(self._model.current_changed)
self.preserve_selected_books = PreserveSelection(self) self.preserve_state = partial(PreserveViewState, self)
# {{{ Column Header setup # {{{ Column Header setup
self.can_add_columns = True self.can_add_columns = True
@ -788,6 +807,23 @@ class BooksView(QTableView): # {{{
ans.append(i) ans.append(i)
return ans return ans
@dynamic_property
def current_id(self):
def fget(self):
try:
return self.model().id(self.currentIndex())
except:
pass
return None
def fset(self, val):
if val is None: return
m = self.model()
for row in xrange(m.rowCount(QModelIndex())):
if m.id(row) == val:
self.set_current_row(row, select=False)
break
return property(fget=fget, fset=fset)
def close(self): def close(self):
self._model.close() self._model.close()

View File

@ -29,10 +29,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
(_('Only on send'), 'on_send'), (_('Only on send'), 'on_send'),
(_('Automatic management'), 'on_connect')] (_('Automatic management'), 'on_connect')]
r('manage_device_metadata', prefs, choices=choices) r('manage_device_metadata', prefs, choices=choices)
if gui.device_manager.is_device_connected: if gui.device_manager.is_device_connected:
self.opt_manage_device_metadata.setEnabled(False) self.opt_manage_device_metadata.setEnabled(False)
self.opt_manage_device_metadata.setToolTip( self.opt_manage_device_metadata.setToolTip(
_('Cannot change metadata management while a device is connected')) _('Cannot change metadata management while a device is connected'))
self.mm_label.setText('Metadata management (disabled while '
'device connected)')
self.send_template.changed_signal.connect(self.changed_signal.emit) self.send_template.changed_signal.connect(self.changed_signal.emit)

View File

@ -15,7 +15,7 @@
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="0" column="0"> <item row="0" column="0">
<widget class="QLabel" name="label_4"> <widget class="QLabel" name="mm_label">
<property name="sizePolicy"> <property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Preferred"> <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
<horstretch>0</horstretch> <horstretch>0</horstretch>