Merge from trunk

This commit is contained in:
Charles Haley 2011-01-25 18:56:05 +00:00
commit c47013ef46
11 changed files with 122 additions and 35 deletions

View File

@ -62,6 +62,18 @@ div.description {
text-indent: 1em; text-indent: 1em;
} }
/*
* Attempt to minimize widows and orphans by logically grouping chunks
* Recommend enabling for iPad
* Some reports of problems with Sony ereaders, presumably ADE engines
*/
/*
div.logical_group {
display:inline-block;
width:100%;
}
*/
p.date_index { p.date_index {
font-size:x-large; font-size:x-large;
text-align:center; text-align:center;

View File

@ -1,17 +1,67 @@
# -*- coding: utf-8
__license__ = 'GPL v3'
__author__ = 'Luis Hernandez'
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
description = 'Periódico gratuito en español - v0.5 - 25 Jan 2011'
'''
www.20minutos.es
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1295310874(BasicNewsRecipe): class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'20 Minutos (Boletin)'
__author__ = 'Luis Hernandez' title = u'20 Minutos'
description = 'Periódico gratuito en español' publisher = u'Grupo 20 Minutos'
__author__ = u'Luis Hernández'
description = u'Periódico gratuito en español'
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif' cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
language = 'es'
oldest_article = 2 oldest_article = 5
max_articles_per_feed = 50 max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
encoding = 'ISO-8859-1'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
keep_only_tags = [dict(name='div', attrs={'id':['content']})
,dict(name='div', attrs={'class':['boxed','description','lead','article-content']})
,dict(name='span', attrs={'class':['photo-bar']})
,dict(name='ul', attrs={'class':['article-author']})
]
remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
remove_tags_after = dict(name='div' , attrs={'class':['related-news','col']})
remove_tags = [
dict(name='ol', attrs={'class':['navigation',]})
,dict(name='span', attrs={'class':['action']})
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col']})
,dict(name='div', attrs={'id':['twitter-destacados']})
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
]
feeds = [
(u'Portada' , u'http://www.20minutos.es/rss/')
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/')
,(u'Internacional' , u'http://www.20minutos.es/rss/internacional/')
,(u'Economia' , u'http://www.20minutos.es/rss/economia/')
,(u'Deportes' , u'http://www.20minutos.es/rss/deportes/')
,(u'Tecnologia' , u'http://www.20minutos.es/rss/tecnologia/')
,(u'Gente - TV' , u'http://www.20minutos.es/rss/gente-television/')
,(u'Motor' , u'http://www.20minutos.es/rss/motor/')
,(u'Salud' , u'http://www.20minutos.es/rss/belleza-y-salud/')
,(u'Viajes' , u'http://www.20minutos.es/rss/viajes/')
,(u'Vivienda' , u'http://www.20minutos.es/rss/vivienda/')
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/')
,(u'Cine' , u'http://www.20minutos.es/rss/cine/')
,(u'Musica' , u'http://www.20minutos.es/rss/musica/')
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/')
]
feeds = [(u'VESPERTINO', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
, (u'DEPORTES', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
, (u'CULTURA', u'http://www.20minutos.es/rss/ocio/')
, (u'TV', u'http://20minutos.feedsportal.com/c/32489/f/490877/index.rss')
]

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
newyorker.com newyorker.com
''' '''
@ -54,10 +54,10 @@ class NewYorker(BasicNewsRecipe):
,dict(attrs={'id':['show-header','show-footer'] }) ,dict(attrs={'id':['show-header','show-footer'] })
] ]
remove_attributes = ['lang'] remove_attributes = ['lang']
feeds = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')] feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/rss/feeds/everything.xml')]
def print_version(self, url): def print_version(self, url):
return url + '?printable=true' return 'http://www.newyorker.com' + url + '?printable=true'
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):
return url.strip() return url.strip()

View File

@ -498,7 +498,7 @@ class NYTimes(BasicNewsRecipe):
for lidiv in div.findAll('li'): for lidiv in div.findAll('li'):
if not skipping: if not skipping:
self.handle_article(lidiv) self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.ans)
@ -609,7 +609,7 @@ class NYTimes(BasicNewsRecipe):
if article_date < self.earliest_date: if article_date < self.earliest_date:
self.log("Skipping article dated %s" % date_str) self.log("Skipping article dated %s" % date_str)
return None return None
#all articles are from today, no need to print the date on every page #all articles are from today, no need to print the date on every page
try: try:
if not self.webEdition: if not self.webEdition:
@ -631,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('") refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
refend = reflinkstring.find(".html", refstart) + len(".html") refend = reflinkstring.find(".html", refstart) + len(".html")
reflinkstring = reflinkstring[refstart:refend] reflinkstring = reflinkstring[refstart:refend]
popuppage = self.browser.open(reflinkstring) popuppage = self.browser.open(reflinkstring)
popuphtml = popuppage.read() popuphtml = popuppage.read()
popuppage.close() popuppage.close()
@ -640,7 +640,7 @@ class NYTimes(BasicNewsRecipe):
year = str(st.tm_year) year = str(st.tm_year)
month = "%.2d" % st.tm_mon month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday day = "%.2d" % st.tm_mday
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4] highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
popupSoup = BeautifulSoup(popuphtml) popupSoup = BeautifulSoup(popuphtml)
highResTag = popupSoup.find('img', {'src':highResImageLink}) highResTag = popupSoup.find('img', {'src':highResImageLink})
@ -659,9 +659,9 @@ class NYTimes(BasicNewsRecipe):
imageTag['height'] = newHeight imageTag['height'] = newHeight
except: except:
self.log("Error setting the src width and height parameters") self.log("Error setting the src width and height parameters")
except Exception as e: except Exception:
self.log("Error pulling high resolution images") self.log("Error pulling high resolution images")
try: try:
#remove "Related content" bar #remove "Related content" bar
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']}) runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
@ -674,8 +674,8 @@ class NYTimes(BasicNewsRecipe):
hline.extract() hline.extract()
except: except:
self.log("Error removing related content bar") self.log("Error removing related content bar")
try: try:
#in case pulling images failed, delete the enlarge this text #in case pulling images failed, delete the enlarge this text
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'}) enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})

View File

@ -17,7 +17,7 @@ from lxml import etree
import cssutils import cssutils
from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \ from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
DC_NSES, OPF DC_NSES, OPF, xml2text
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, \ from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, \
@ -423,7 +423,7 @@ class OEBReader(object):
path, frag = urldefrag(href) path, frag = urldefrag(href)
if path not in self.oeb.manifest.hrefs: if path not in self.oeb.manifest.hrefs:
continue continue
title = ' '.join(xpath(anchor, './/text()')) title = xml2text(anchor)
title = COLLAPSE_RE.sub(' ', title.strip()) title = COLLAPSE_RE.sub(' ', title.strip())
if href not in titles: if href not in titles:
order.append(href) order.append(href)

View File

@ -550,6 +550,14 @@ def choose_dir(window, name, title, default_dir='~'):
if dir: if dir:
return dir[0] return dir[0]
def choose_osx_app(window, name, title, default_dir='/Applications'):
fd = FileDialog(title=title, parent=window, name=name, mode=QFileDialog.ExistingFile,
default_dir=default_dir)
app = fd.get_files()
fd.setParent(None)
if app:
return app
def choose_files(window, name, title, def choose_files(window, name, title,
filters=[], all_files=True, select_only_single_file=False): filters=[], all_files=True, select_only_single_file=False):
''' '''

View File

@ -9,7 +9,7 @@ import os, datetime
from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
from calibre.gui2 import error_dialog, gprefs from calibre.gui2 import error_dialog
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
from calibre import strftime from calibre import strftime
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
@ -165,10 +165,12 @@ class FetchAnnotationsAction(InterfaceAction):
ka_soup.insert(0,divTag) ka_soup.insert(0,divTag)
return ka_soup return ka_soup
'''
def mark_book_as_read(self,id): def mark_book_as_read(self,id):
read_tag = gprefs.get('catalog_epub_mobi_read_tag') read_tag = gprefs.get('catalog_epub_mobi_read_tag')
if read_tag: if read_tag:
self.db.set_tags(id, [read_tag], append=True) self.db.set_tags(id, [read_tag], append=True)
'''
def canceled(self): def canceled(self):
self.pd.hide() self.pd.hide()
@ -201,10 +203,12 @@ class FetchAnnotationsAction(InterfaceAction):
# Update library comments # Update library comments
self.db.set_comment(id, mi.comments) self.db.set_comment(id, mi.comments)
'''
# Update 'read' tag except for Catalogs/Clippings # Update 'read' tag except for Catalogs/Clippings
if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD: if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
if not set(mi.tags).intersection(ignore_tags): if not set(mi.tags).intersection(ignore_tags):
self.mark_book_as_read(id) self.mark_book_as_read(id)
'''
# Add bookmark file to id # Add bookmark file to id
self.db.add_format_with_hooks(id, bm.value.bookmark_extension, self.db.add_format_with_hooks(id, bm.value.bookmark_extension,

View File

@ -335,7 +335,7 @@ class PluginWidget(QWidget,Ui_Form):
''' '''
return return
'''
if new_state == 0: if new_state == 0:
# unchecked # unchecked
self.merge_source_field.setEnabled(False) self.merge_source_field.setEnabled(False)
@ -348,6 +348,7 @@ class PluginWidget(QWidget,Ui_Form):
self.merge_before.setEnabled(True) self.merge_before.setEnabled(True)
self.merge_after.setEnabled(True) self.merge_after.setEnabled(True)
self.include_hr.setEnabled(True) self.include_hr.setEnabled(True)
'''
def header_note_source_field_changed(self,new_index): def header_note_source_field_changed(self,new_index):
''' '''

View File

@ -638,8 +638,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
time.sleep(2) time.sleep(2)
if mb is not None:
mb.flush()
self.hide_windows() self.hide_windows()
return True return True

View File

@ -42,6 +42,7 @@ class MetadataBackup(Thread): # {{{
def stop(self): def stop(self):
self.keep_running = False self.keep_running = False
self.flush()
# Break cycles so that this object doesn't hold references to db # Break cycles so that this object doesn't hold references to db
self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \ self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \
self.set_dirtied = self.db = None self.set_dirtied = self.db = None
@ -57,7 +58,10 @@ class MetadataBackup(Thread): # {{{
except: except:
# Happens during interpreter shutdown # Happens during interpreter shutdown
break break
if not self.keep_running:
break
self.in_limbo = id_
try: try:
path, mi = self.get_metadata_for_dump(id_) path, mi = self.get_metadata_for_dump(id_)
except: except:
@ -72,10 +76,10 @@ class MetadataBackup(Thread): # {{{
continue continue
# at this point the dirty indication is off # at this point the dirty indication is off
if mi is None: if mi is None:
continue continue
self.in_limbo = id_ if not self.keep_running:
break
# Give the GUI thread a chance to do something. Python threads don't # Give the GUI thread a chance to do something. Python threads don't
# have priorities, so this thread would naturally keep the processor # have priorities, so this thread would naturally keep the processor
@ -89,6 +93,9 @@ class MetadataBackup(Thread): # {{{
traceback.print_exc() traceback.print_exc()
continue continue
if not self.keep_running:
break
time.sleep(0.1) # Give the GUI thread a chance to do something time.sleep(0.1) # Give the GUI thread a chance to do something
try: try:
self.do_write(path, raw) self.do_write(path, raw)
@ -102,7 +109,8 @@ class MetadataBackup(Thread): # {{{
prints('Failed to write backup metadata for id:', id_, prints('Failed to write backup metadata for id:', id_,
'again, giving up') 'again, giving up')
continue continue
self.in_limbo = None
self.in_limbo = None
def flush(self): def flush(self):
'Used during shutdown to ensure that a dirtied book is not missed' 'Used during shutdown to ensure that a dirtied book is not missed'
@ -111,6 +119,7 @@ class MetadataBackup(Thread): # {{{
self.db.dirtied([self.in_limbo]) self.db.dirtied([self.in_limbo])
except: except:
traceback.print_exc() traceback.print_exc()
self.in_limbo = None
def write(self, path, raw): def write(self, path, raw):
with lopen(path, 'wb') as f: with lopen(path, 'wb') as f:

View File

@ -1820,6 +1820,9 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
self.booksByTitle_noSeriesPrefix = nspt self.booksByTitle_noSeriesPrefix = nspt
# Loop through the books by title # Loop through the books by title
# Generate one divRunningTag per initial letter for the purposes of
# minimizing widows and orphans on readers that can handle large
# <divs> styled as inline-block
title_list = self.booksByTitle title_list = self.booksByTitle
if not self.useSeriesPrefixInTitlesSection: if not self.useSeriesPrefixInTitlesSection:
title_list = self.booksByTitle_noSeriesPrefix title_list = self.booksByTitle_noSeriesPrefix
@ -1832,7 +1835,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
divTag.insert(dtc, divRunningTag) divTag.insert(dtc, divRunningTag)
dtc += 1 dtc += 1
divRunningTag = Tag(soup, 'div') divRunningTag = Tag(soup, 'div')
divRunningTag['style'] = 'display:inline-block;width:100%' divRunningTag['class'] = "logical_group"
drtc = 0 drtc = 0
current_letter = self.letter_or_symbol(book['title_sort'][0]) current_letter = self.letter_or_symbol(book['title_sort'][0])
pIndexTag = Tag(soup, "p") pIndexTag = Tag(soup, "p")
@ -1954,6 +1957,8 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
drtc = 0 drtc = 0
# Loop through booksByAuthor # Loop through booksByAuthor
# Each author/books group goes in an openingTag div (first) or
# a runningTag div (subsequent)
book_count = 0 book_count = 0
current_author = '' current_author = ''
current_letter = '' current_letter = ''
@ -1977,7 +1982,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
current_letter = self.letter_or_symbol(book['author_sort'][0].upper()) current_letter = self.letter_or_symbol(book['author_sort'][0].upper())
author_count = 0 author_count = 0
divOpeningTag = Tag(soup, 'div') divOpeningTag = Tag(soup, 'div')
divOpeningTag['style'] = 'display:inline-block;width:100%' divOpeningTag['class'] = "logical_group"
dotc = 0 dotc = 0
pIndexTag = Tag(soup, "p") pIndexTag = Tag(soup, "p")
pIndexTag['class'] = "letter_index" pIndexTag['class'] = "letter_index"
@ -2001,7 +2006,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
# Create a divRunningTag for the rest of the authors in this letter # Create a divRunningTag for the rest of the authors in this letter
divRunningTag = Tag(soup, 'div') divRunningTag = Tag(soup, 'div')
divRunningTag['style'] = 'display:inline-block;width:100%' divRunningTag['class'] = "logical_group"
drtc = 0 drtc = 0
non_series_books = 0 non_series_books = 0