Merge from trunk

This commit is contained in:
Charles Haley 2011-02-11 10:44:40 +00:00
commit 914ddaae86
12 changed files with 993 additions and 645 deletions

View File

@ -5,8 +5,8 @@ __license__ = 'GPL v3'
__copyright__ = '04 December 2010, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Aragon'
__version__ = 'v0.05'
__date__ = '07, December 2010'
__version__ = 'v0.07'
__date__ = '06, February 2011'
'''
elperiodicodearagon.com
'''
@ -38,22 +38,26 @@ class elperiodicodearagon(BasicNewsRecipe):
,'publisher' : publisher
}
feeds = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
feeds = [
(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
]
extra_css = '''
h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
.columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
img{margin-bottom: 0.4em}
'''
remove_attributes = ['height','width']
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
dict(name='a', attrs={'class':'AvisoComentario'}),
dict(name='div', attrs={'class':'CajaAvisoComentario'}),
dict(name='div', attrs={'class':'navegaNoticias'}),
dict(name='div', attrs={'class':'Mensaje'}),
dict(name='div', attrs={'id':'PaginadorDiCom'}),
dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
dict(name='div', attrs={'id':'CintilloComentario'}),
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
]
# Para sustituir el video incrustado de YouTube por una imagen
def preprocess_html(self, soup):
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
if video_yt:
video_yt.name = 'img'
fuente = video_yt['src']
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
video_yt['src'] = fuente2 + '/0.jpg'
return soup

View File

@ -0,0 +1,33 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
blogs.tedneward.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class InteroperabilityHappens(BasicNewsRecipe):
title = 'Interoperability Happens'
__author__ = 'Darko Miletic'
description = 'Tech blog by Ted Neward'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
extra_css = """
body{font-family: Verdana,Arial,Helvetica,sans-serif}
"""
conversion_options = {
'comment' : description
, 'tags' : 'blog, technology, microsoft, programming, C#, Java'
, 'publisher': 'Ted Neward'
, 'language' : language
}
feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]

View File

@ -0,0 +1,104 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '4 February 2011, desUBIKado'
__author__ = 'desUBIKado'
__version__ = 'v0.05'
__date__ = '9, February 2011'
'''
http://www.weblogssl.com/
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class weblogssl(BasicNewsRecipe):
__author__ = 'desUBIKado'
description = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
title = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
publisher = 'Weblogs SL'
category = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1.5
max_articles_per_feed = 100
encoding = 'utf-8'
use_embedded_content = False
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
# Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
# un caracter # por delante, es decir, # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
# haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
feeds = [
(u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
(u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
(u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
(u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
(u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
(u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
(u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
]
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
dict(name='div', attrs={'class':'post'}),
dict(name='div', attrs={'id':'blog-comments'})
]
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})]
def print_version(self, url):
return url.replace('http://www.', 'http://m.')
preprocess_regexps = [
# Para poner una linea en blanco entre un comentario y el siguiente
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
]
# Para sustituir el video incrustado de YouTube por una imagen
def preprocess_html(self, soup):
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
if video_yt:
video_yt.name = 'img'
fuente = video_yt['src']
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
fuente3 = fuente2.replace('?rel=0','')
video_yt['src'] = fuente3 + '/0.jpg'
return soup

View File

@ -83,7 +83,7 @@ class ANDROID(USBMS):
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT']

View File

@ -39,6 +39,13 @@ def asfloat(value):
return 0.0
return float(value)
def isspace(text):
if not text:
return True
if u'\xa0' in text:
return False
return text.isspace()
class BlockState(object):
def __init__(self, body):
self.body = body
@ -438,7 +445,7 @@ class MobiMLizer(object):
if elem.text:
if istate.preserve:
text = elem.text
elif len(elem) > 0 and elem.text.isspace():
elif len(elem) > 0 and isspace(elem.text):
text = None
else:
text = COLLAPSE.sub(' ', elem.text)
@ -481,7 +488,7 @@ class MobiMLizer(object):
if child.tail:
if istate.preserve:
tail = child.tail
elif bstate.para is None and child.tail.isspace():
elif bstate.para is None and isspace(child.tail):
tail = None
else:
tail = COLLAPSE.sub(' ', child.tail)

View File

@ -70,7 +70,7 @@ class PML_HTMLizer(object):
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
'r': ('<div style="text-align: right;">', '</div>'),
't': ('<div style="margin-left: 5%;">', '</div>'),
'T': ('<div style="margin-left: %s;">', '</div>'),
'T': ('<div style="text-indent: %s;">', '</div>'),
'i': ('<span style="font-style: italic;">', '</span>'),
'u': ('<span style="text-decoration: underline;">', '</span>'),
'd': ('<span style="text-decoration: line-through;">', '</span>'),
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
self.toc = []
self.file_name = file_name
indent_state = {'t': False, 'T': False}
# t: Are we in an open \t tag set?
# T: Are we in an open \T?
# st: Did the \t start the line?
# sT: Did the \T start the line?
# et: Did the \t end the line?
indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
basic_indent = False
adv_indent_val = ''
# Keep track of the number of empty lines
# between paragraphs. When we reach a set number
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
for line in pml.splitlines():
parsed = []
empty = True
basic_indent = indent_state['t']
adv_indent = indent_state['T']
indent_state['T'] = False
# Determine if the \t starts the line or if we are
# in an open \t block.
if line.lstrip().startswith('\\t') or basic_indent:
basic_indent = True
indent_state['st'] = True
else:
indent_state['st'] = False
# Determine if the \T starts the line.
if line.lstrip().startswith('\\T'):
indent_state['sT'] = True
else:
indent_state['sT'] = False
# Determine if the \t ends the line.
if line.rstrip().endswith('\\t'):
indent_state['et'] = True
else:
indent_state['et'] = False
# Must use StringIO, cStringIO does not support unicode
line = StringIO.StringIO(line)
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
empty = False
text = '<hr width="%s" />' % self.code_value(line)
elif c == 't':
indent_state[c] = not indent_state[c]
if indent_state[c]:
basic_indent = True
indent_state['t'] = not indent_state['t']
elif c == 'T':
# Ensure we only store the value on the first T set for the line.
if not indent_state['T']:
adv_indent = True
adv_indent_val = self.code_value(line)
else:
# We detected a T previously on this line.
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
text = self.end_line()
parsed.append(text)
# Basic indent will be set if the \t starts the line or
# if we are in a continuing \t block.
if basic_indent:
parsed.insert(0, self.STATES_TAGS['t'][0])
parsed.append(self.STATES_TAGS['t'][1])
elif adv_indent:
# if the \t started the line and either it ended the line or the \t
# block is still open use a left margin.
if indent_state['st'] and (indent_state['et'] or indent_state['t']):
parsed.insert(0, self.STATES_TAGS['t'][0])
parsed.append(self.STATES_TAGS['t'][1])
# Use a text indent instead of a margin.
# This handles cases such as:
# \tO\tne upon a time...
else:
parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
parsed.append(self.STATES_TAGS['T'][1])
# \t will override \T's on the line.
# We only handle \T's that started the line.
elif indent_state['T'] and indent_state['sT']:
parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
parsed.append(self.STATES_TAGS['T'][1])
indent_state['T'] = False

View File

@ -227,8 +227,12 @@ class PluginTweaks(QDialog): # {{{
self.highlighter = PythonHighlighter(self.edit.document())
self.l = QVBoxLayout()
self.setLayout(self.l)
self.l.addWidget(QLabel(
_('Add/edit tweaks for any custom plugins you have installed.')))
self.msg = QLabel(
_('Add/edit tweaks for any custom plugins you have installed. '
'Documentation for these tweaks should be available '
'on the website from where you downloaded the plugins.'))
self.msg.setWordWrap(True)
self.l.addWidget(self.msg)
self.l.addWidget(self.edit)
self.edit.setPlainText(raw)
self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,

View File

@ -440,16 +440,17 @@ class Document(QWebPage): # {{{
@property
def height(self):
j = self.javascript('document.body.offsetHeight', 'int')
# Note that document.body.offsetHeight does not include top and bottom
# margins on body and in some cases does not include the top margin on
# the first element inside body either. See ticket #8791 for an example
# of the latter.
q = self.mainFrame().contentsSize().height()
if q == j:
return j
if min(j, q) <= 0:
return max(j, q)
window_height = self.window_height
if j == window_height:
return j if q < 1.2*j else q
return j
if q < 0:
# Don't know if this is still needed, but it can't hurt
j = self.javascript('document.body.offsetHeight', 'int')
if j >= 0:
q = j
return q
@property
def width(self):

View File

@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
Convert Microsoft Word documents
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
produces really messy HTML, converting it can take a long time, so be patient.
There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
generating the Table of Contents much simpler. It is called BookCreator and is available for free

File diff suppressed because it is too large Load Diff

View File

@ -8,11 +8,13 @@ import re, htmlentitydefs
_ascii_pat = None
def clean_ascii_chars(txt, charlist=None):
'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
'''
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
This is all control chars except \\t,\\n and \\r
'''
global _ascii_pat
if _ascii_pat is None:
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
+ [0x1A, 0x1B]
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None:

View File

@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
from calibre.utils.logging import default_log
from calibre import entity_to_unicode, strftime
from calibre.utils.date import dt_factory, utcnow, local_tz
from calibre.utils.cleantext import clean_ascii_chars
class Article(object):
@ -43,7 +44,7 @@ class Article(object):
print summary.encode('utf-8')
traceback.print_exc()
summary = u''
self.text_summary = summary
self.text_summary = clean_ascii_chars(summary)
self.author = author
self.content = content
self.date = published