mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
914ddaae86
@ -5,8 +5,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '04 December 2010, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__description__ = 'Daily newspaper from Aragon'
|
||||
__version__ = 'v0.05'
|
||||
__date__ = '07, December 2010'
|
||||
__version__ = 'v0.07'
|
||||
__date__ = '06, February 2011'
|
||||
'''
|
||||
elperiodicodearagon.com
|
||||
'''
|
||||
@ -38,22 +38,26 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
feeds = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
||||
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
||||
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
||||
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
||||
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
||||
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
||||
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
||||
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
||||
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
||||
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
|
||||
feeds = [
|
||||
(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
||||
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
||||
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
||||
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
||||
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
||||
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
||||
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
||||
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
||||
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
||||
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||
h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
|
||||
h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
|
||||
.columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||
img{margin-bottom: 0.4em}
|
||||
'''
|
||||
|
||||
remove_attributes = ['height','width']
|
||||
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
dict(name='a', attrs={'class':'AvisoComentario'}),
|
||||
dict(name='div', attrs={'class':'CajaAvisoComentario'}),
|
||||
dict(name='div', attrs={'class':'navegaNoticias'}),
|
||||
dict(name='div', attrs={'class':'Mensaje'}),
|
||||
dict(name='div', attrs={'id':'PaginadorDiCom'}),
|
||||
dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
|
||||
dict(name='div', attrs={'id':'CintilloComentario'}),
|
||||
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
|
||||
]
|
||||
|
||||
# Para sustituir el video incrustado de YouTube por una imagen
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||
if video_yt:
|
||||
video_yt.name = 'img'
|
||||
fuente = video_yt['src']
|
||||
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||
video_yt['src'] = fuente2 + '/0.jpg'
|
||||
|
||||
return soup
|
||||
|
33
resources/recipes/tedneward.recipe
Normal file
33
resources/recipes/tedneward.recipe
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blogs.tedneward.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class InteroperabilityHappens(BasicNewsRecipe):
|
||||
title = 'Interoperability Happens'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Tech blog by Ted Neward'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'blog, technology, microsoft, programming, C#, Java'
|
||||
, 'publisher': 'Ted Neward'
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
|
||||
|
104
resources/recipes/weblogs_sl.recipe
Normal file
104
resources/recipes/weblogs_sl.recipe
Normal file
@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '4 February 2011, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__version__ = 'v0.05'
|
||||
__date__ = '9, February 2011'
|
||||
'''
|
||||
http://www.weblogssl.com/
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class weblogssl(BasicNewsRecipe):
|
||||
__author__ = 'desUBIKado'
|
||||
description = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
|
||||
title = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
|
||||
publisher = 'Weblogs SL'
|
||||
category = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1.5
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
# Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
|
||||
# un caracter # por delante, es decir, # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||
# haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
|
||||
|
||||
feeds = [
|
||||
(u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
|
||||
(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
|
||||
(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
|
||||
(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
|
||||
(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
|
||||
(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
|
||||
(u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
|
||||
(u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||
(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
|
||||
(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
|
||||
(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
|
||||
(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
|
||||
(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
|
||||
(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
|
||||
(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
|
||||
(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
|
||||
(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
|
||||
(u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
|
||||
(u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
|
||||
(u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
|
||||
(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
|
||||
(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
|
||||
(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
|
||||
(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
|
||||
(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
|
||||
(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
|
||||
(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
|
||||
(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
|
||||
(u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
|
||||
(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
|
||||
(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
|
||||
(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
|
||||
(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
|
||||
(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
|
||||
(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
|
||||
(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
|
||||
(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
|
||||
(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
|
||||
(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
|
||||
(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
|
||||
dict(name='div', attrs={'class':'post'}),
|
||||
dict(name='div', attrs={'id':'blog-comments'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.', 'http://m.')
|
||||
|
||||
preprocess_regexps = [
|
||||
# Para poner una linea en blanco entre un comentario y el siguiente
|
||||
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
|
||||
]
|
||||
|
||||
# Para sustituir el video incrustado de YouTube por una imagen
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||
if video_yt:
|
||||
video_yt.name = 'img'
|
||||
fuente = video_yt['src']
|
||||
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||
fuente3 = fuente2.replace('?rel=0','')
|
||||
video_yt['src'] = fuente3 + '/0.jpg'
|
||||
|
||||
return soup
|
@ -83,7 +83,7 @@ class ANDROID(USBMS):
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT']
|
||||
|
@ -39,6 +39,13 @@ def asfloat(value):
|
||||
return 0.0
|
||||
return float(value)
|
||||
|
||||
def isspace(text):
|
||||
if not text:
|
||||
return True
|
||||
if u'\xa0' in text:
|
||||
return False
|
||||
return text.isspace()
|
||||
|
||||
class BlockState(object):
|
||||
def __init__(self, body):
|
||||
self.body = body
|
||||
@ -438,7 +445,7 @@ class MobiMLizer(object):
|
||||
if elem.text:
|
||||
if istate.preserve:
|
||||
text = elem.text
|
||||
elif len(elem) > 0 and elem.text.isspace():
|
||||
elif len(elem) > 0 and isspace(elem.text):
|
||||
text = None
|
||||
else:
|
||||
text = COLLAPSE.sub(' ', elem.text)
|
||||
@ -481,7 +488,7 @@ class MobiMLizer(object):
|
||||
if child.tail:
|
||||
if istate.preserve:
|
||||
tail = child.tail
|
||||
elif bstate.para is None and child.tail.isspace():
|
||||
elif bstate.para is None and isspace(child.tail):
|
||||
tail = None
|
||||
else:
|
||||
tail = COLLAPSE.sub(' ', child.tail)
|
||||
|
@ -70,7 +70,7 @@ class PML_HTMLizer(object):
|
||||
'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
|
||||
'r': ('<div style="text-align: right;">', '</div>'),
|
||||
't': ('<div style="margin-left: 5%;">', '</div>'),
|
||||
'T': ('<div style="margin-left: %s;">', '</div>'),
|
||||
'T': ('<div style="text-indent: %s;">', '</div>'),
|
||||
'i': ('<span style="font-style: italic;">', '</span>'),
|
||||
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
||||
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
||||
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
|
||||
self.toc = []
|
||||
self.file_name = file_name
|
||||
|
||||
indent_state = {'t': False, 'T': False}
|
||||
# t: Are we in an open \t tag set?
|
||||
# T: Are we in an open \T?
|
||||
# st: Did the \t start the line?
|
||||
# sT: Did the \T start the line?
|
||||
# et: Did the \t end the line?
|
||||
indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
|
||||
basic_indent = False
|
||||
adv_indent_val = ''
|
||||
# Keep track of the number of empty lines
|
||||
# between paragraphs. When we reach a set number
|
||||
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
|
||||
for line in pml.splitlines():
|
||||
parsed = []
|
||||
empty = True
|
||||
|
||||
basic_indent = indent_state['t']
|
||||
adv_indent = indent_state['T']
|
||||
indent_state['T'] = False
|
||||
# Determine if the \t starts the line or if we are
|
||||
# in an open \t block.
|
||||
if line.lstrip().startswith('\\t') or basic_indent:
|
||||
basic_indent = True
|
||||
indent_state['st'] = True
|
||||
else:
|
||||
indent_state['st'] = False
|
||||
# Determine if the \T starts the line.
|
||||
if line.lstrip().startswith('\\T'):
|
||||
indent_state['sT'] = True
|
||||
else:
|
||||
indent_state['sT'] = False
|
||||
# Determine if the \t ends the line.
|
||||
if line.rstrip().endswith('\\t'):
|
||||
indent_state['et'] = True
|
||||
else:
|
||||
indent_state['et'] = False
|
||||
|
||||
# Must use StringIO, cStringIO does not support unicode
|
||||
line = StringIO.StringIO(line)
|
||||
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
|
||||
empty = False
|
||||
text = '<hr width="%s" />' % self.code_value(line)
|
||||
elif c == 't':
|
||||
indent_state[c] = not indent_state[c]
|
||||
if indent_state[c]:
|
||||
basic_indent = True
|
||||
indent_state['t'] = not indent_state['t']
|
||||
elif c == 'T':
|
||||
# Ensure we only store the value on the first T set for the line.
|
||||
if not indent_state['T']:
|
||||
adv_indent = True
|
||||
adv_indent_val = self.code_value(line)
|
||||
else:
|
||||
# We detected a T previously on this line.
|
||||
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
|
||||
text = self.end_line()
|
||||
parsed.append(text)
|
||||
|
||||
# Basic indent will be set if the \t starts the line or
|
||||
# if we are in a continuing \t block.
|
||||
if basic_indent:
|
||||
parsed.insert(0, self.STATES_TAGS['t'][0])
|
||||
parsed.append(self.STATES_TAGS['t'][1])
|
||||
elif adv_indent:
|
||||
# if the \t started the line and either it ended the line or the \t
|
||||
# block is still open use a left margin.
|
||||
if indent_state['st'] and (indent_state['et'] or indent_state['t']):
|
||||
parsed.insert(0, self.STATES_TAGS['t'][0])
|
||||
parsed.append(self.STATES_TAGS['t'][1])
|
||||
# Use a text indent instead of a margin.
|
||||
# This handles cases such as:
|
||||
# \tO\tne upon a time...
|
||||
else:
|
||||
parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
|
||||
parsed.append(self.STATES_TAGS['T'][1])
|
||||
# \t will override \T's on the line.
|
||||
# We only handle \T's that started the line.
|
||||
elif indent_state['T'] and indent_state['sT']:
|
||||
parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
|
||||
parsed.append(self.STATES_TAGS['T'][1])
|
||||
indent_state['T'] = False
|
||||
|
@ -227,8 +227,12 @@ class PluginTweaks(QDialog): # {{{
|
||||
self.highlighter = PythonHighlighter(self.edit.document())
|
||||
self.l = QVBoxLayout()
|
||||
self.setLayout(self.l)
|
||||
self.l.addWidget(QLabel(
|
||||
_('Add/edit tweaks for any custom plugins you have installed.')))
|
||||
self.msg = QLabel(
|
||||
_('Add/edit tweaks for any custom plugins you have installed. '
|
||||
'Documentation for these tweaks should be available '
|
||||
'on the website from where you downloaded the plugins.'))
|
||||
self.msg.setWordWrap(True)
|
||||
self.l.addWidget(self.msg)
|
||||
self.l.addWidget(self.edit)
|
||||
self.edit.setPlainText(raw)
|
||||
self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
|
||||
|
@ -440,16 +440,17 @@ class Document(QWebPage): # {{{
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
j = self.javascript('document.body.offsetHeight', 'int')
|
||||
# Note that document.body.offsetHeight does not include top and bottom
|
||||
# margins on body and in some cases does not include the top margin on
|
||||
# the first element inside body either. See ticket #8791 for an example
|
||||
# of the latter.
|
||||
q = self.mainFrame().contentsSize().height()
|
||||
if q == j:
|
||||
return j
|
||||
if min(j, q) <= 0:
|
||||
return max(j, q)
|
||||
window_height = self.window_height
|
||||
if j == window_height:
|
||||
return j if q < 1.2*j else q
|
||||
return j
|
||||
if q < 0:
|
||||
# Don't know if this is still needed, but it can't hurt
|
||||
j = self.javascript('document.body.offsetHeight', 'int')
|
||||
if j >= 0:
|
||||
q = j
|
||||
return q
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
|
@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
|
||||
Convert Microsoft Word documents
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
|
||||
|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
|
||||
as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
|
||||
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
|
||||
"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
|
||||
produces really messy HTML, converting it can take a long time, so be patient.
|
||||
|
||||
There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
|
||||
generating the Table of Contents much simpler. It is called BookCreator and is available for free
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,11 +8,13 @@ import re, htmlentitydefs
|
||||
_ascii_pat = None
|
||||
|
||||
def clean_ascii_chars(txt, charlist=None):
|
||||
'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
|
||||
'''
|
||||
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
|
||||
This is all control chars except \\t,\\n and \\r
|
||||
'''
|
||||
global _ascii_pat
|
||||
if _ascii_pat is None:
|
||||
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
|
||||
+ [0x1A, 0x1B]
|
||||
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
|
||||
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||
|
||||
if charlist is None:
|
||||
|
@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from calibre.utils.date import dt_factory, utcnow, local_tz
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
class Article(object):
|
||||
|
||||
@ -43,7 +44,7 @@ class Article(object):
|
||||
print summary.encode('utf-8')
|
||||
traceback.print_exc()
|
||||
summary = u''
|
||||
self.text_summary = summary
|
||||
self.text_summary = clean_ascii_chars(summary)
|
||||
self.author = author
|
||||
self.content = content
|
||||
self.date = published
|
||||
|
Loading…
x
Reference in New Issue
Block a user