Merge from trunk

This commit is contained in:
Charles Haley 2011-10-26 09:07:58 +02:00
commit 728be0ff52
7 changed files with 145 additions and 40 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 712 B

View File

@ -1,9 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.utils.magick import Image, PixelWand, create_canvas
''' Version 1.2, updated cover image to match the changed website.
added info date on title
version 1.4 Updated tags, delay and added autoclean 22-09-2011
version 1.5 Changes due to changes in site
version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
Added som processing on pictures
Removed links in html
Removed extre white characters
changed handling of self closing span
'''
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro Nieuws NL'
# Version 1.2, updated cover image to match the changed website.
# added info date on title
oldest_article = 2
max_articles_per_feed = 100
__author__ = u'DrMerry'
@ -11,8 +24,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
language = u'nl'
simultaneous_downloads = 5
#delay = 1
auto_cleanup = True
auto_cleanup_keep = '//div[@class="article-image-caption-2column"]|//div[@id="date"]'
#auto_cleanup = True
#auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
timefmt = ' [%A, %d %b %Y]'
no_stylesheets = True
remove_javascript = True
@ -20,22 +33,74 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
publication_type = 'newspaper'
remove_tags_before = dict(name='div', attrs={'id':'date'})
remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
remove_tags_after = dict(name='div', attrs={'class':'article-body'})
encoding = 'utf-8'
extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph, p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}'
remove_attributes = ['style', 'font', 'width', 'height']
use_embedded_content = False
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
#date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
.article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
.article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
dict(name='div', attrs={'id':['date']}),
dict(name='h1', attrs={'class':['title']}),
dict(name='h2', attrs={'class':['subtitle']})]
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}),
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
dict(name='iframe')]
preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
(re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
]
def postprocess_html(self, soup, first):
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
#width, height = img.size
#print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
pw = PixelWand()
img.trim(0)
img.save(iurl)
'''
#width, height = img.size
#print '***TRIMMED img width is: ', width, 'height is: ', height
left=0
top=0
border_color='#ffffff'
width, height = img.size
#print '***retrieved img width is: ', width, 'height is: ', height
height_correction = 1.17
canvas = create_canvas(width, height*height_correction,border_color)
canvas.compose(img, left, top)
#img = canvas
canvas.save(iurl)
#width, height = canvas.size
#print '***NEW img width is: ', width, 'height is: ', height
'''
return soup
feeds = [
(u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
(u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
(u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
(u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
(u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
(u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
(u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
(u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
(u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),

View File

@ -12,21 +12,18 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe):
title = u'The Australian'
__author__ = u'Matthew Briggs and Sujata Raman'
description = u'National broadsheet newspaper from down under - colloquially known as The Oz'
description = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
'. You will need to have a subscription to '
'http://www.theaustralian.com.au to get full articles.')
language = 'en_AU'
oldest_article = 2
needs_subscription = 'optional'
max_articles_per_feed = 30
remove_javascript = True
no_stylesheets = True
encoding = 'utf8'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Australia'
, '--publisher' , title
]
keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
#remove_tags = [dict(name=['object','link'])]
@ -67,6 +64,19 @@ class DailyTelegraph(BasicNewsRecipe):
(u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username and self.password:
br.open('http://www.theaustralian.com.au')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()
if '>log out' not in raw.lower():
raise ValueError('Failed to log in to www.theaustralian.com.au'
' are your username and password correct?')
return br
def get_article_url(self, article):
return article.id
@ -76,14 +86,4 @@ class DailyTelegraph(BasicNewsRecipe):
#return br.geturl()
def get_cover_url(self):
href = 'http://www.theaustralian.news.com.au/'
soup = self.index_to_soup(href)
img = soup.find('img',alt ="AUS HP promo digital2")
print img
if img :
cover_url = img['src']
return cover_url

View File

@ -855,6 +855,7 @@ class DeviceMixin(object): # {{{
Force the library view to refresh, taking into consideration new
device books information
'''
with self.library_view.preserve_state():
self.book_on_device(None, reset=True)
if reset_only:
return
@ -1319,7 +1320,7 @@ class DeviceMixin(object): # {{{
# If it does not, then do it here.
if not self.set_books_in_library(self.booklists(), reset=True, add_as_step_to_job=job):
self.upload_booklists(job)
with self.library_view.preserve_selected_books:
with self.library_view.preserve_state():
self.book_on_device(None, reset=True)
self.refresh_ondevice()

View File

@ -23,24 +23,43 @@ from calibre.gui2.library import DEFAULT_SORT
from calibre.constants import filesystem_encoding
from calibre import force_unicode
class PreserveSelection(object): # {{{
class PreserveViewState(object): # {{{
'''
Save the set of selected books at enter time. If at exit time there are no
selected books, restore the previous selection.
selected books, restore the previous selection, the previous current index
and dont affect the scroll position.
'''
def __init__(self, view):
def __init__(self, view, preserve_hpos=True, preserve_vpos=True):
self.view = view
self.selected_ids = []
self.selected_ids = set()
self.current_id = None
self.preserve_hpos = preserve_hpos
self.preserve_vpos = preserve_vpos
self.vscroll = self.hscroll = 0
def __enter__(self):
try:
self.selected_ids = self.view.get_selected_ids()
self.current_id = self.view.current_id
self.vscroll = self.view.verticalScrollBar().value()
self.hscroll = self.view.horizontalScrollBar().value()
except:
import traceback
traceback.print_exc()
def __exit__(self, *args):
current = self.view.get_selected_ids()
if not current:
self.view.select_rows(self.selected_ids, using_ids=True)
if not current and self.selected_ids:
if self.current_id is not None:
self.view.current_id = self.current_id
self.view.select_rows(self.selected_ids, using_ids=True,
scroll=False, change_current=self.current_id is None)
if self.preserve_vpos:
self.view.verticalScrollBar().setValue(self.vscroll)
if self.preserve_hpos:
self.view.horizontalScrollBar().setValue(self.hscroll)
# }}}
class BooksView(QTableView): # {{{
@ -104,7 +123,7 @@ class BooksView(QTableView): # {{{
self.setSelectionBehavior(QAbstractItemView.SelectRows)
self.setSortingEnabled(True)
self.selectionModel().currentRowChanged.connect(self._model.current_changed)
self.preserve_selected_books = PreserveSelection(self)
self.preserve_state = partial(PreserveViewState, self)
# {{{ Column Header setup
self.can_add_columns = True
@ -788,6 +807,23 @@ class BooksView(QTableView): # {{{
ans.append(i)
return ans
@dynamic_property
def current_id(self):
def fget(self):
try:
return self.model().id(self.currentIndex())
except:
pass
return None
def fset(self, val):
if val is None: return
m = self.model()
for row in xrange(m.rowCount(QModelIndex())):
if m.id(row) == val:
self.set_current_row(row, select=False)
break
return property(fget=fget, fset=fset)
def close(self):
self._model.close()

View File

@ -29,10 +29,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
(_('Only on send'), 'on_send'),
(_('Automatic management'), 'on_connect')]
r('manage_device_metadata', prefs, choices=choices)
if gui.device_manager.is_device_connected:
self.opt_manage_device_metadata.setEnabled(False)
self.opt_manage_device_metadata.setToolTip(
_('Cannot change metadata management while a device is connected'))
self.mm_label.setText('Metadata management (disabled while '
'device connected)')
self.send_template.changed_signal.connect(self.changed_signal.emit)

View File

@ -15,7 +15,7 @@
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="label_4">
<widget class="QLabel" name="mm_label">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Preferred">
<horstretch>0</horstretch>