Merge from trunk

This commit is contained in:
Charles Haley 2011-04-14 10:56:58 +01:00
commit f10aaf23e9
11 changed files with 274 additions and 94 deletions

View File

@ -0,0 +1,36 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
title = u'Hallo Assen'
oldest_article = 180
max_articles_per_feed = 100
__author__ = 'Reijndert'
no_stylesheets = True
cover_url = 'http://www.halloassen.nl/multimedia/halloassen/archive/00002/HalloAssen_2518a.gif'
language = 'nl'
country = 'NL'
version = 1
category = u'Nieuws'
timefmt = ' %Y-%m-%d (%a)'
keep_only_tags = [dict(name='div', attrs={'class':'photoFrame'})
,dict(name='div', attrs={'class':'textContent'})
]
remove_tags = [
dict(name='div',attrs={'id':'articleLinks'})
,dict(name='div',attrs={'class':'categories clearfix'})
,dict(name='div',attrs={'id':'rating'})
,dict(name='div',attrs={'id':'comments'})
]
feeds = [(u'Ons Nieuws', u'http://feeds.feedburner.com/halloassen/onsnieuws'), (u'Politie', u'http://www.halloassen.nl/rss/?c=37'), (u'Rechtbank', u'http://www.halloassen.nl/rss/?c=39'), (u'Justitie', u'http://www.halloassen.nl/rss/?c=36'), (u'Evenementen', u'http://www.halloassen.nl/rss/?c=34'), (u'Cultuur', u'http://www.halloassen.nl/rss/?c=32'), (u'Politiek', u'http://www.halloassen.nl/rss/?c=38'), (u'Economie', u'http://www.halloassen.nl/rss/?c=33')]
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
'''

View File

@ -3,7 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '4 February 2011, desUBIKado'
__author__ = 'desUBIKado'
__version__ = 'v0.05'
__date__ = '9, February 2011'
__date__ = '13, April 2011'
'''
http://www.weblogssl.com/
'''
@ -19,7 +19,7 @@ class weblogssl(BasicNewsRecipe):
category = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1.5
oldest_article = 1
max_articles_per_feed = 100
encoding = 'utf-8'
use_embedded_content = False
@ -28,50 +28,52 @@ class weblogssl(BasicNewsRecipe):
no_stylesheets = True
# Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
# un caracter # por delante, es decir, # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
# haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
# un caracter # por delante, es decir, # ,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
# haría que no se descargase Applesfera.
feeds = [
(u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
(u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
(u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
(u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
(u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
(u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
(u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
(u'Xataka', u'http://feeds.weblogssl.com/xataka2')
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
,(u'Genbeta', u'http://feeds.weblogssl.com/genbeta')
,(u'Genbeta Dev', u'http://feeds.weblogssl.com/genbetadev')
,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
,(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra')
,(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred')
,(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine')
,(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2')
,(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica')
,(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero')
,(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco')
,(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa')
,(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom')
,(u'Fandemia', u'http://feeds.weblogssl.com/fandemia')
,(u'Noctamina', u'http://feeds.weblogssl.com/noctamina')
,(u'Tendencias', u'http://feeds.weblogssl.com/trendencias')
,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
,(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar')
,(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion')
,(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera')
,(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia')
,(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica')
,(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg')
,(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora')
,(u'Mensencia', u'http://feeds.weblogssl.com/mensencia')
,(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas')
,(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion')
,(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1')
,(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto')
,(u'Motorpasi\xf3n Futuro', u'http://feeds.weblogssl.com/motorpasionfuturo')
,(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol')
,(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites')
,(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar')
,(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2')
,(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos')
,(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme')
,(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
]
@ -102,3 +104,4 @@ class weblogssl(BasicNewsRecipe):
video_yt['src'] = fuente3 + '/0.jpg'
return soup

View File

@ -81,6 +81,11 @@ class WallStreetJournal(BasicNewsRecipe):
feeds.append((title, articles))
return feeds
def abs_wsj_url(self, href):
if not href.startswith('http'):
href = 'http://online.wsj.com' + href
return href
def parse_index(self):
soup = self.wsj_get_index()
@ -99,14 +104,14 @@ class WallStreetJournal(BasicNewsRecipe):
pageone = a['href'].endswith('pageone')
if pageone:
title = 'Front Section'
url = 'http://online.wsj.com' + a['href']
url = self.abs_wsj_url(a['href'])
feeds = self.wsj_add_feed(feeds,title,url)
title = "What's News"
url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url)
else:
title = self.tag_to_string(a)
url = 'http://online.wsj.com' + a['href']
url = self.abs_wsj_url(a['href'])
feeds = self.wsj_add_feed(feeds,title,url)
return feeds
@ -163,7 +168,7 @@ class WallStreetJournal(BasicNewsRecipe):
title = self.tag_to_string(a).strip() + ' [%s]'%meta
else:
title = self.tag_to_string(a).strip()
url = 'http://online.wsj.com'+a['href']
url = self.abs_wsj_url(a['href'])
desc = ''
for p in container.findAll('p'):
desc = self.tag_to_string(p)

View File

@ -54,6 +54,9 @@ class ANDROID(USBMS):
0x6877 : [0x0400],
},
# Viewsonic
0x0489 : { 0xc001 : [0x0226] },
# Acer
0x502 : { 0x3203 : [0x0100]},

View File

@ -6,8 +6,8 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, textwrap, sys
from copy import deepcopy
import os, textwrap, sys, operator
from copy import deepcopy, copy
from lxml import etree
@ -149,9 +149,65 @@ class TextBlock(etree.XSLTExtension):
self.root = root
self.parent = root
self.add_text_to = (self.parent, 'text')
self.fix_deep_nesting(node)
for child in node:
self.process_child(child)
def fix_deep_nesting(self, node):
deepest = 1
def depth(node):
parent = node.getparent()
ans = 1
while parent is not None:
ans += 1
parent = parent.getparent()
return ans
for span in node.xpath('descendant::Span'):
d = depth(span)
if d > deepest:
deepest = d
if d > 500:
break
if deepest < 500:
return
self.log.warn('Found deeply nested spans. Flattening.')
#with open('/t/before.xml', 'wb') as f:
# f.write(etree.tostring(node, method='xml'))
spans = [(depth(span), span) for span in node.xpath('descendant::Span')]
spans.sort(key=operator.itemgetter(0), reverse=True)
for depth, span in spans:
if depth < 3:
continue
p = span.getparent()
gp = p.getparent()
idx = p.index(span)
pidx = gp.index(p)
children = list(p)[idx:]
t = children[-1].tail
t = t if t else ''
children[-1].tail = t + (p.tail if p.tail else '')
p.tail = ''
pattrib = dict(**p.attrib) if p.tag == 'Span' else {}
for child in children:
p.remove(child)
if pattrib and child.tag == "Span":
attrib = copy(pattrib)
attrib.update(child.attrib)
child.attrib.update(attrib)
for child in reversed(children):
gp.insert(pidx+1, child)
#with open('/t/after.xml', 'wb') as f:
# f.write(etree.tostring(node, method='xml'))
def add_text(self, text):
if text:
if getattr(self.add_text_to[0], self.add_text_to[1]) is None:

View File

@ -395,8 +395,8 @@ if __name__ == '__main__': # tests {{{
# unknown to Amazon
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
[title_test('The Invisible Gorilla',
exact=True), authors_test(['Christopher F. Chabris', 'Daniel Simons'])]
),
@ -404,7 +404,7 @@ if __name__ == '__main__': # tests {{{
{'title':'Learning Python',
'authors':['Lutz']},
[title_test('Learning Python',
exact=True), authors_test(['Mark Lutz'])
exact=True), authors_test(['Mark J. Lutz', 'David Ascher'])
]
),

View File

@ -218,11 +218,11 @@ def test_identify_plugin(name, tests): # {{{
'')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ',
'_')))
with open(cover, 'wb') as f:
f.write(cdata)
f.write(cdata[-1])
prints('Cover downloaded to:', cover)
if len(cdata) < 10240:
if len(cdata[-1]) < 10240:
prints('Downloaded cover too small')
raise SystemExit(1)

View File

@ -463,9 +463,9 @@ class MobiMLizer(object):
text = COLLAPSE.sub(' ', elem.text)
valign = style['vertical-align']
not_baseline = valign in ('super', 'sub', 'text-top',
'text-bottom') or (
'text-bottom', 'top', 'bottom') or (
isinstance(valign, (float, int)) and abs(valign) != 0)
issup = valign in ('super', 'text-top') or (
issup = valign in ('super', 'text-top', 'top') or (
isinstance(valign, (float, int)) and valign > 0)
vtag = 'sup' if issup else 'sub'
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
@ -484,6 +484,7 @@ class MobiMLizer(object):
parent = bstate.para if bstate.inline is None else bstate.inline
if parent is not None:
vtag = etree.SubElement(parent, XHTML(vtag))
vtag = etree.SubElement(vtag, XHTML('small'))
# Add anchors
for child in vbstate.body:
if child is not vbstate.para:

View File

@ -6,9 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, time
from functools import partial
from PyQt4.Qt import Qt, QMenu
from PyQt4.Qt import Qt, QMenu, QAction, pyqtSignal
from calibre.constants import isosx
from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
@ -18,6 +17,19 @@ from calibre.utils.config import prefs
from calibre.ptempfile import PersistentTemporaryFile
from calibre.gui2.actions import InterfaceAction
class HistoryAction(QAction):
view_historical = pyqtSignal(object)
def __init__(self, id_, title, parent):
QAction.__init__(self, title, parent)
self.id = id_
self.triggered.connect(self._triggered)
def _triggered(self):
self.view_historical.emit(self.id)
class ViewAction(InterfaceAction):
name = 'View'
@ -28,18 +40,51 @@ class ViewAction(InterfaceAction):
self.persistent_files = []
self.qaction.triggered.connect(self.view_book)
self.view_menu = QMenu()
self.view_menu.addAction(_('View'), partial(self.view_book, False))
ac = self.view_menu.addAction(_('View specific format'))
ac.setShortcut((Qt.ControlModifier if isosx else Qt.AltModifier)+Qt.Key_V)
ac = self.view_specific_action = QAction(_('View specific format'),
self.gui)
self.qaction.setMenu(self.view_menu)
ac.setShortcut((Qt.ControlModifier if isosx else Qt.AltModifier)+Qt.Key_V)
ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
self.view_menu.addSeparator()
ac = self.view_action = QAction(self.qaction.icon(),
self.qaction.text(), self.gui)
ac.triggered.connect(self.view_book)
ac = self.create_action(spec=(_('Read a random book'), 'catalog.png',
None, None), attr='action_pick_random')
ac.triggered.connect(self.view_random)
self.view_menu.addAction(ac)
ac = self.clear_history_action = QAction(
_('Clear recently viewed list'), self.gui)
ac.triggered.connect(self.clear_history)
def initialization_complete(self):
self.build_menus(self.gui.current_db)
def build_menus(self, db):
self.view_menu.clear()
self.view_menu.addAction(self.qaction)
self.view_menu.addAction(self.view_specific_action)
self.view_menu.addSeparator()
self.view_menu.addAction(self.action_pick_random)
self.history_actions = []
history = db.prefs.get('gui_view_history', [])
if history:
self.view_menu.addSeparator()
for id_, title in history:
ac = HistoryAction(id_, title, self.view_menu)
self.view_menu.addAction(ac)
ac.view_historical.connect(self.view_historical)
self.view_menu.addSeparator()
self.view_menu.addAction(self.clear_history_action)
def clear_history(self):
db = self.gui.current_db
db.prefs['gui_view_history'] = []
self.build_menus(db)
def view_historical(self, id_):
self._view_calibre_books([id_])
def library_changed(self, db):
self.build_menus(db)
def location_selected(self, loc):
enabled = loc == 'library'
@ -47,15 +92,17 @@ class ViewAction(InterfaceAction):
action.setEnabled(enabled)
def view_format(self, row, format):
fmt_path = self.gui.library_view.model().db.format_abspath(row, format)
if fmt_path:
self._view_file(fmt_path)
id_ = self.gui.library_view.model().id(row)
self.view_format_by_id(id_, format)
def view_format_by_id(self, id_, format):
fmt_path = self.gui.library_view.model().db.format_abspath(id_, format,
db = self.gui.current_db
fmt_path = db.format_abspath(id_, format,
index_is_id=True)
if fmt_path:
title = db.title(id_, index_is_id=True)
self._view_file(fmt_path)
self.update_history([(id_, title)])
def book_downloaded_for_viewing(self, job):
if job.failed:
@ -162,6 +209,54 @@ class ViewAction(InterfaceAction):
self.gui.iactions['Choose Library'].pick_random()
self._view_books([self.gui.library_view.currentIndex()])
def _view_calibre_books(self, ids):
db = self.gui.current_db
views = []
for id_ in ids:
try:
formats = db.formats(id_, index_is_id=True)
except:
error_dialog(self.gui, _('Cannot view'),
_('This book no longer exists in your library'), show=True)
self.update_history([], remove=set([id_]))
continue
title = db.title(id_, index_is_id=True)
if not formats:
error_dialog(self.gui, _('Cannot view'),
_('%s has no available formats.')%(title,), show=True)
continue
formats = formats.upper().split(',')
fmt = formats[0]
for format in prefs['input_format_order']:
if format in formats:
fmt = format
break
views.append((id_, title))
self.view_format_by_id(id_, fmt)
self.update_history(views)
def update_history(self, views, remove=frozenset()):
db = self.gui.current_db
if views:
seen = set()
history = []
for id_, title in views + db.prefs.get('gui_view_history', []):
if title not in seen:
seen.add(title)
history.append((id_, title))
db.prefs['gui_view_history'] = history[:10]
self.build_menus(db)
if remove:
history = db.prefs.get('gui_view_history', [])
history = [x for x in history if x[0] not in remove]
db.prefs['gui_view_history'] = history[:10]
self.build_menus(db)
def _view_books(self, rows):
if not rows or len(rows) == 0:
self._launch_viewer()
@ -171,28 +266,8 @@ class ViewAction(InterfaceAction):
return
if self.gui.current_view() is self.gui.library_view:
for row in rows:
if hasattr(row, 'row'):
row = row.row()
formats = self.gui.library_view.model().db.formats(row)
title = self.gui.library_view.model().db.title(row)
if not formats:
error_dialog(self.gui, _('Cannot view'),
_('%s has no available formats.')%(title,), show=True)
continue
formats = formats.upper().split(',')
in_prefs = False
for format in prefs['input_format_order']:
if format in formats:
in_prefs = True
self.view_format(row, format)
break
if not in_prefs:
self.view_format(row, formats[0])
ids = list(map(self.gui.library_view.model().id, rows))
self._view_calibre_books(ids)
else:
paths = self.gui.current_view().model().paths(rows)
for path in paths:

View File

@ -202,7 +202,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.changed_signal.emit()
def refresh_gui(self, gui):
gui.emailer.calculate_rate_limit()
from calibre.gui2.email import gui_sendmail
gui_sendmail.calculate_rate_limit()
if __name__ == '__main__':

View File

@ -20,9 +20,9 @@ What formats does |app| support conversion to/from?
|app| supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format.
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, SNB, TCR, TXT
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers
@ -30,7 +30,7 @@ It can convert every input format in the following list, to every output format.
What are the best source formats to convert?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In order of decreasing preference: LIT, MOBI, EPUB, HTML, PRC, RTF, PDB, TXT, PDF
In order of decreasing preference: LIT, MOBI, EPUB, FB2, HTML, PRC, RTF, PDB, TXT, PDF
Why does the PDF conversion lose some images/tables?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~