mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
914ddaae86
@ -5,8 +5,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '04 December 2010, desUBIKado'
|
__copyright__ = '04 December 2010, desUBIKado'
|
||||||
__author__ = 'desUBIKado'
|
__author__ = 'desUBIKado'
|
||||||
__description__ = 'Daily newspaper from Aragon'
|
__description__ = 'Daily newspaper from Aragon'
|
||||||
__version__ = 'v0.05'
|
__version__ = 'v0.07'
|
||||||
__date__ = '07, December 2010'
|
__date__ = '06, February 2011'
|
||||||
'''
|
'''
|
||||||
elperiodicodearagon.com
|
elperiodicodearagon.com
|
||||||
'''
|
'''
|
||||||
@ -38,22 +38,26 @@ class elperiodicodearagon(BasicNewsRecipe):
|
|||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
feeds = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
feeds = [
|
||||||
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
||||||
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
||||||
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
||||||
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
||||||
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
||||||
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
||||||
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
||||||
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
||||||
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
|
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
||||||
|
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
|
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
|
||||||
dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
|
||||||
|
.columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||||
|
img{margin-bottom: 0.4em}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
remove_attributes = ['height','width']
|
remove_attributes = ['height','width']
|
||||||
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
|
|||||||
dict(name='a', attrs={'class':'AvisoComentario'}),
|
dict(name='a', attrs={'class':'AvisoComentario'}),
|
||||||
dict(name='div', attrs={'class':'CajaAvisoComentario'}),
|
dict(name='div', attrs={'class':'CajaAvisoComentario'}),
|
||||||
dict(name='div', attrs={'class':'navegaNoticias'}),
|
dict(name='div', attrs={'class':'navegaNoticias'}),
|
||||||
|
dict(name='div', attrs={'class':'Mensaje'}),
|
||||||
dict(name='div', attrs={'id':'PaginadorDiCom'}),
|
dict(name='div', attrs={'id':'PaginadorDiCom'}),
|
||||||
dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
|
dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
|
||||||
dict(name='div', attrs={'id':'CintilloComentario'}),
|
dict(name='div', attrs={'id':'CintilloComentario'}),
|
||||||
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
|
|||||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
(re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
|
(re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Para sustituir el video incrustado de YouTube por una imagen
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||||
|
if video_yt:
|
||||||
|
video_yt.name = 'img'
|
||||||
|
fuente = video_yt['src']
|
||||||
|
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||||
|
video_yt['src'] = fuente2 + '/0.jpg'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
33
resources/recipes/tedneward.recipe
Normal file
33
resources/recipes/tedneward.recipe
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
blogs.tedneward.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class InteroperabilityHappens(BasicNewsRecipe):
|
||||||
|
title = 'Interoperability Happens'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Tech blog by Ted Neward'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'blog, technology, microsoft, programming, C#, Java'
|
||||||
|
, 'publisher': 'Ted Neward'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
|
||||||
|
|
104
resources/recipes/weblogs_sl.recipe
Normal file
104
resources/recipes/weblogs_sl.recipe
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '4 February 2011, desUBIKado'
|
||||||
|
__author__ = 'desUBIKado'
|
||||||
|
__version__ = 'v0.05'
|
||||||
|
__date__ = '9, February 2011'
|
||||||
|
'''
|
||||||
|
http://www.weblogssl.com/
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class weblogssl(BasicNewsRecipe):
|
||||||
|
__author__ = 'desUBIKado'
|
||||||
|
description = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
|
||||||
|
title = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
|
||||||
|
publisher = 'Weblogs SL'
|
||||||
|
category = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
|
||||||
|
language = 'es'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
oldest_article = 1.5
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
# Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
|
||||||
|
# un caracter # por delante, es decir, # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||||
|
# haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
|
||||||
|
(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
|
||||||
|
(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
|
||||||
|
(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
|
||||||
|
(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
|
||||||
|
(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
|
||||||
|
(u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
|
||||||
|
(u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||||
|
(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
|
||||||
|
(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
|
||||||
|
(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
|
||||||
|
(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
|
||||||
|
(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
|
||||||
|
(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
|
||||||
|
(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
|
||||||
|
(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
|
||||||
|
(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
|
||||||
|
(u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
|
||||||
|
(u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
|
||||||
|
(u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
|
||||||
|
(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
|
||||||
|
(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
|
||||||
|
(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
|
||||||
|
(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
|
||||||
|
(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
|
||||||
|
(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
|
||||||
|
(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
|
||||||
|
(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
|
||||||
|
(u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
|
||||||
|
(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
|
||||||
|
(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
|
||||||
|
(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
|
||||||
|
(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
|
||||||
|
(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
|
||||||
|
(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
|
||||||
|
(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
|
||||||
|
(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
|
||||||
|
(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
|
||||||
|
(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
|
||||||
|
(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
|
||||||
|
dict(name='div', attrs={'class':'post'}),
|
||||||
|
dict(name='div', attrs={'id':'blog-comments'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.', 'http://m.')
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
# Para poner una linea en blanco entre un comentario y el siguiente
|
||||||
|
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
|
||||||
|
]
|
||||||
|
|
||||||
|
# Para sustituir el video incrustado de YouTube por una imagen
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||||
|
if video_yt:
|
||||||
|
video_yt.name = 'img'
|
||||||
|
fuente = video_yt['src']
|
||||||
|
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||||
|
fuente3 = fuente2.replace('?rel=0','')
|
||||||
|
video_yt['src'] = fuente3 + '/0.jpg'
|
||||||
|
|
||||||
|
return soup
|
@ -83,7 +83,7 @@ class ANDROID(USBMS):
|
|||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
|
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT']
|
'A70S', 'A101IT']
|
||||||
|
@ -39,6 +39,13 @@ def asfloat(value):
|
|||||||
return 0.0
|
return 0.0
|
||||||
return float(value)
|
return float(value)
|
||||||
|
|
||||||
|
def isspace(text):
|
||||||
|
if not text:
|
||||||
|
return True
|
||||||
|
if u'\xa0' in text:
|
||||||
|
return False
|
||||||
|
return text.isspace()
|
||||||
|
|
||||||
class BlockState(object):
|
class BlockState(object):
|
||||||
def __init__(self, body):
|
def __init__(self, body):
|
||||||
self.body = body
|
self.body = body
|
||||||
@ -438,7 +445,7 @@ class MobiMLizer(object):
|
|||||||
if elem.text:
|
if elem.text:
|
||||||
if istate.preserve:
|
if istate.preserve:
|
||||||
text = elem.text
|
text = elem.text
|
||||||
elif len(elem) > 0 and elem.text.isspace():
|
elif len(elem) > 0 and isspace(elem.text):
|
||||||
text = None
|
text = None
|
||||||
else:
|
else:
|
||||||
text = COLLAPSE.sub(' ', elem.text)
|
text = COLLAPSE.sub(' ', elem.text)
|
||||||
@ -481,7 +488,7 @@ class MobiMLizer(object):
|
|||||||
if child.tail:
|
if child.tail:
|
||||||
if istate.preserve:
|
if istate.preserve:
|
||||||
tail = child.tail
|
tail = child.tail
|
||||||
elif bstate.para is None and child.tail.isspace():
|
elif bstate.para is None and isspace(child.tail):
|
||||||
tail = None
|
tail = None
|
||||||
else:
|
else:
|
||||||
tail = COLLAPSE.sub(' ', child.tail)
|
tail = COLLAPSE.sub(' ', child.tail)
|
||||||
|
@ -70,7 +70,7 @@ class PML_HTMLizer(object):
|
|||||||
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
|
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
|
||||||
'r': ('<div style="text-align: right;">', '</div>'),
|
'r': ('<div style="text-align: right;">', '</div>'),
|
||||||
't': ('<div style="margin-left: 5%;">', '</div>'),
|
't': ('<div style="margin-left: 5%;">', '</div>'),
|
||||||
'T': ('<div style="margin-left: %s;">', '</div>'),
|
'T': ('<div style="text-indent: %s;">', '</div>'),
|
||||||
'i': ('<span style="font-style: italic;">', '</span>'),
|
'i': ('<span style="font-style: italic;">', '</span>'),
|
||||||
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
||||||
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
||||||
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
|
|||||||
self.toc = []
|
self.toc = []
|
||||||
self.file_name = file_name
|
self.file_name = file_name
|
||||||
|
|
||||||
indent_state = {'t': False, 'T': False}
|
# t: Are we in an open \t tag set?
|
||||||
|
# T: Are we in an open \T?
|
||||||
|
# st: Did the \t start the line?
|
||||||
|
# sT: Did the \T start the line?
|
||||||
|
# et: Did the \t end the line?
|
||||||
|
indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
|
||||||
|
basic_indent = False
|
||||||
adv_indent_val = ''
|
adv_indent_val = ''
|
||||||
# Keep track of the number of empty lines
|
# Keep track of the number of empty lines
|
||||||
# between paragraphs. When we reach a set number
|
# between paragraphs. When we reach a set number
|
||||||
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
|
|||||||
for line in pml.splitlines():
|
for line in pml.splitlines():
|
||||||
parsed = []
|
parsed = []
|
||||||
empty = True
|
empty = True
|
||||||
|
|
||||||
basic_indent = indent_state['t']
|
basic_indent = indent_state['t']
|
||||||
adv_indent = indent_state['T']
|
indent_state['T'] = False
|
||||||
|
# Determine if the \t starts the line or if we are
|
||||||
|
# in an open \t block.
|
||||||
|
if line.lstrip().startswith('\\t') or basic_indent:
|
||||||
|
basic_indent = True
|
||||||
|
indent_state['st'] = True
|
||||||
|
else:
|
||||||
|
indent_state['st'] = False
|
||||||
|
# Determine if the \T starts the line.
|
||||||
|
if line.lstrip().startswith('\\T'):
|
||||||
|
indent_state['sT'] = True
|
||||||
|
else:
|
||||||
|
indent_state['sT'] = False
|
||||||
|
# Determine if the \t ends the line.
|
||||||
|
if line.rstrip().endswith('\\t'):
|
||||||
|
indent_state['et'] = True
|
||||||
|
else:
|
||||||
|
indent_state['et'] = False
|
||||||
|
|
||||||
# Must use StringIO, cStringIO does not support unicode
|
# Must use StringIO, cStringIO does not support unicode
|
||||||
line = StringIO.StringIO(line)
|
line = StringIO.StringIO(line)
|
||||||
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
|
|||||||
empty = False
|
empty = False
|
||||||
text = '<hr width="%s" />' % self.code_value(line)
|
text = '<hr width="%s" />' % self.code_value(line)
|
||||||
elif c == 't':
|
elif c == 't':
|
||||||
indent_state[c] = not indent_state[c]
|
indent_state['t'] = not indent_state['t']
|
||||||
if indent_state[c]:
|
|
||||||
basic_indent = True
|
|
||||||
elif c == 'T':
|
elif c == 'T':
|
||||||
# Ensure we only store the value on the first T set for the line.
|
# Ensure we only store the value on the first T set for the line.
|
||||||
if not indent_state['T']:
|
if not indent_state['T']:
|
||||||
adv_indent = True
|
|
||||||
adv_indent_val = self.code_value(line)
|
adv_indent_val = self.code_value(line)
|
||||||
else:
|
else:
|
||||||
# We detected a T previously on this line.
|
# We detected a T previously on this line.
|
||||||
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
|
|||||||
text = self.end_line()
|
text = self.end_line()
|
||||||
parsed.append(text)
|
parsed.append(text)
|
||||||
|
|
||||||
|
# Basic indent will be set if the \t starts the line or
|
||||||
|
# if we are in a continuing \t block.
|
||||||
if basic_indent:
|
if basic_indent:
|
||||||
parsed.insert(0, self.STATES_TAGS['t'][0])
|
# if the \t started the line and either it ended the line or the \t
|
||||||
parsed.append(self.STATES_TAGS['t'][1])
|
# block is still open use a left margin.
|
||||||
elif adv_indent:
|
if indent_state['st'] and (indent_state['et'] or indent_state['t']):
|
||||||
|
parsed.insert(0, self.STATES_TAGS['t'][0])
|
||||||
|
parsed.append(self.STATES_TAGS['t'][1])
|
||||||
|
# Use a text indent instead of a margin.
|
||||||
|
# This handles cases such as:
|
||||||
|
# \tO\tne upon a time...
|
||||||
|
else:
|
||||||
|
parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
|
||||||
|
parsed.append(self.STATES_TAGS['T'][1])
|
||||||
|
# \t will override \T's on the line.
|
||||||
|
# We only handle \T's that started the line.
|
||||||
|
elif indent_state['T'] and indent_state['sT']:
|
||||||
parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
|
parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
|
||||||
parsed.append(self.STATES_TAGS['T'][1])
|
parsed.append(self.STATES_TAGS['T'][1])
|
||||||
indent_state['T'] = False
|
indent_state['T'] = False
|
||||||
|
@ -227,8 +227,12 @@ class PluginTweaks(QDialog): # {{{
|
|||||||
self.highlighter = PythonHighlighter(self.edit.document())
|
self.highlighter = PythonHighlighter(self.edit.document())
|
||||||
self.l = QVBoxLayout()
|
self.l = QVBoxLayout()
|
||||||
self.setLayout(self.l)
|
self.setLayout(self.l)
|
||||||
self.l.addWidget(QLabel(
|
self.msg = QLabel(
|
||||||
_('Add/edit tweaks for any custom plugins you have installed.')))
|
_('Add/edit tweaks for any custom plugins you have installed. '
|
||||||
|
'Documentation for these tweaks should be available '
|
||||||
|
'on the website from where you downloaded the plugins.'))
|
||||||
|
self.msg.setWordWrap(True)
|
||||||
|
self.l.addWidget(self.msg)
|
||||||
self.l.addWidget(self.edit)
|
self.l.addWidget(self.edit)
|
||||||
self.edit.setPlainText(raw)
|
self.edit.setPlainText(raw)
|
||||||
self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
|
self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
|
||||||
|
@ -440,16 +440,17 @@ class Document(QWebPage): # {{{
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def height(self):
|
def height(self):
|
||||||
j = self.javascript('document.body.offsetHeight', 'int')
|
# Note that document.body.offsetHeight does not include top and bottom
|
||||||
|
# margins on body and in some cases does not include the top margin on
|
||||||
|
# the first element inside body either. See ticket #8791 for an example
|
||||||
|
# of the latter.
|
||||||
q = self.mainFrame().contentsSize().height()
|
q = self.mainFrame().contentsSize().height()
|
||||||
if q == j:
|
if q < 0:
|
||||||
return j
|
# Don't know if this is still needed, but it can't hurt
|
||||||
if min(j, q) <= 0:
|
j = self.javascript('document.body.offsetHeight', 'int')
|
||||||
return max(j, q)
|
if j >= 0:
|
||||||
window_height = self.window_height
|
q = j
|
||||||
if j == window_height:
|
return q
|
||||||
return j if q < 1.2*j else q
|
|
||||||
return j
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def width(self):
|
def width(self):
|
||||||
|
@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
|
|||||||
Convert Microsoft Word documents
|
Convert Microsoft Word documents
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
|
|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
|
||||||
as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
|
as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
|
||||||
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
|
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
|
||||||
|
produces really messy HTML, converting it can take a long time, so be patient.
|
||||||
|
|
||||||
There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
|
There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
|
||||||
generating the Table of Contents much simpler. It is called BookCreator and is available for free
|
generating the Table of Contents much simpler. It is called BookCreator and is available for free
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -8,11 +8,13 @@ import re, htmlentitydefs
|
|||||||
_ascii_pat = None
|
_ascii_pat = None
|
||||||
|
|
||||||
def clean_ascii_chars(txt, charlist=None):
|
def clean_ascii_chars(txt, charlist=None):
|
||||||
'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
|
'''
|
||||||
|
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
|
||||||
|
This is all control chars except \\t,\\n and \\r
|
||||||
|
'''
|
||||||
global _ascii_pat
|
global _ascii_pat
|
||||||
if _ascii_pat is None:
|
if _ascii_pat is None:
|
||||||
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
|
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
|
||||||
+ [0x1A, 0x1B]
|
|
||||||
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||||
|
|
||||||
if charlist is None:
|
if charlist is None:
|
||||||
|
@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
|
|||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre import entity_to_unicode, strftime
|
from calibre import entity_to_unicode, strftime
|
||||||
from calibre.utils.date import dt_factory, utcnow, local_tz
|
from calibre.utils.date import dt_factory, utcnow, local_tz
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
|
||||||
class Article(object):
|
class Article(object):
|
||||||
|
|
||||||
@ -43,7 +44,7 @@ class Article(object):
|
|||||||
print summary.encode('utf-8')
|
print summary.encode('utf-8')
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
summary = u''
|
summary = u''
|
||||||
self.text_summary = summary
|
self.text_summary = clean_ascii_chars(summary)
|
||||||
self.author = author
|
self.author = author
|
||||||
self.content = content
|
self.content = content
|
||||||
self.date = published
|
self.date = published
|
||||||
|
Loading…
x
Reference in New Issue
Block a user