Sync to trunk.

This commit is contained in:
John Schember 2009-07-06 19:14:58 -04:00
commit 0e8cc35f18
48 changed files with 22137 additions and 20673 deletions

View File

@ -250,6 +250,19 @@ class KindleDXOutput(OutputProfile):
def tags_to_string(cls, tags):
return 'ttt '.join(tags)+'ttt '
class IlliadOutput(OutputProfile):
name = 'Illiad'
short_name = 'illiad'
description = _('This profile is intended for the Irex Illiad.')
screen_size = (760, 925)
comic_screen_size = (760, 925)
dpi = 160.0
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,

View File

@ -17,6 +17,34 @@ from calibre.customize.conversion import OptionRecommendation
from lxml import etree
block_level_tags = (
'address',
'body',
'blockquote',
'center',
'dir',
'div',
'dl',
'fieldset',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hr',
'isindex',
'menu',
'noframes',
'noscript',
'ol',
'p',
'pre',
'table',
'ul',
)
class EPUBOutput(OutputFormatPlugin):
@ -197,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin):
if 'titlepage' in self.oeb.guide.refs:
self.oeb.guide.refs['titlepage'].href = item.href
def condense_ncx(self, ncx_path):
if not self.opts.pretty_print:
tree = etree.parse(ncx_path)
@ -210,46 +236,46 @@ class EPUBOutput(OutputFormatPlugin):
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
open(ncx_path, 'wb').write(compressed)
def workaround_ade_quirks(self):
'''
Perform various markup transforms to get the output to render correctly
in the quirky ADE.
'''
from calibre.ebooks.oeb.base import XPNSMAP, XHTML, OEB_STYLES
from lxml.etree import XPath as _XPath
from functools import partial
XPath = partial(_XPath, namespaces=XPNSMAP)
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
for x in self.oeb.spine:
root = x.data
body = XPath('//h:body')(root)
if body:
body = body[0]
# Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(body, 'xpath'):
for br in XPath('./h:br')(body):
if br.getparent() is None:
continue
try:
sibling = br.itersiblings().next()
prior = br.itersiblings(preceding=True).next()
priortag = barename(prior.tag)
priortext = prior.tail
except:
sibling = None
priortag = 'body'
priortext = body.text
if priortext:
priortext = priortext.strip()
br.tag = XHTML('p')
br.text = u'\u00a0'
if (br.tail and br.tail.strip()) or sibling is None or \
getattr(sibling, 'tag', '') != XHTML('br'):
style = br.get('style', '').split(';')
style = filter(None, map(lambda x: x.strip(), style))
style.append('margin: 0pt; border:0pt; height:0pt')
br.set('style', '; '.join(style))
style.append('margin:0pt; border:0pt')
# If the prior tag is a block (including a <br> we replaced)
# then this <br> replacement should have a 1-line height.
# Otherwise it should have no height.
if not priortext and priortag in block_level_tags:
style.append('height:1em')
else:
sibling.getparent().remove(sibling)
if sibling.tail:
if not br.tail:
br.tail = ''
br.tail += sibling.tail
style.append('height:0pt')
br.set('style', '; '.join(style))
for tag in XPath('//h:embed')(root):
tag.getparent().remove(tag)

View File

@ -218,7 +218,7 @@ class MetaInformation(object):
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
'pubdate'):
'pubdate', 'rights', 'publication_type'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
@ -243,7 +243,8 @@ class MetaInformation(object):
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
'rights', 'publication_type',
):
setattr(self, x, getattr(mi, x, None))
@ -262,7 +263,8 @@ class MetaInformation(object):
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer',
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'):
'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
'publication_type'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
@ -332,6 +334,8 @@ class MetaInformation(object):
fmt('Timestamp', self.timestamp.isoformat(' '))
if self.pubdate is not None:
fmt('Published', self.pubdate.isoformat(' '))
if self.rights is not None:
fmt('Rights', unicode(self.rights))
if self.lccn:
fmt('LCCN', unicode(self.lccn))
if self.lcc:
@ -362,6 +366,8 @@ class MetaInformation(object):
ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
if self.pubdate is not None:
ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
if self.rights is not None:
ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans)

View File

@ -7,6 +7,7 @@ import re
xml:lang="en"
xmlns="http://www.daisy.org/z3986/2005/ncx/"
xmlns:py="http://genshi.edgewall.org/"
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
>
<head>
<meta name="dtb:uid" content="${uid}"/>
@ -23,6 +24,8 @@ import re
${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
${'%*s'%(4*level,'')}</navLabel>
${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
${'%*s'%(4*level,'')}<calibre:meta py:if="np.author" name="author">${np.author}</calibre:meta>
${'%*s'%(4*level,'')}<calibre:meta py:if="np.description" name="description">${np.description}</calibre:meta>
<py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
${'%*s'%(4*level,'')}</navPoint>
</py:def>

View File

@ -19,6 +19,7 @@
<meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
<meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
<meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
<meta py:if="mi.publication_type is not None" name="calibre:publication_type" content="${mi.publication_type}" />
<py:for each="tag in mi.tags">
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
</py:for>

View File

@ -440,10 +440,12 @@ class OPF(object):
language = MetadataField('language')
comments = MetadataField('description')
category = MetadataField('category')
rights = MetadataField('rights')
series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
rating = MetadataField('rating', is_dc=False, formatter=int)
pubdate = MetadataField('date', formatter=parser.parse)
publication_type = MetadataField('publication_type', is_dc=False)
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)

View File

@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup):
class TOC(list):
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
base_path=os.getcwd(), type='unknown'):
base_path=os.getcwd(), type='unknown', author=None,
description=None):
self.href = href
self.fragment = fragment
if not self.fragment:
@ -31,6 +32,8 @@ class TOC(list):
self.base_path = base_path
self.play_order = play_order
self.type = type
self.author = author
self.description = description
def __str__(self):
lines = ['TOC: %s#%s'%(self.href, self.fragment)]
@ -59,11 +62,13 @@ class TOC(list):
list.remove(self, entry)
entry.parent = None
def add_item(self, href, fragment, text, play_order=None, type='unknown'):
def add_item(self, href, fragment, text, play_order=None, type='unknown',
author=None, description=None):
if play_order is None:
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
base_path=self.base_path, play_order=play_order, type=type))
base_path=self.base_path, play_order=play_order,
type=type, author=author, description=description))
return self[-1]
def top_level_items(self):

View File

@ -6,6 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from cStringIO import StringIO
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
@ -31,16 +32,80 @@ class MOBIOutput(OutputFormatPlugin):
OptionRecommendation(name='toc_title', recommended_value=None,
help=_('Title for any generated in-line table of contents.')
),
OptionRecommendation(name='mobi_periodical',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Generate a periodical rather than a book.')
),
OptionRecommendation(name='dont_compress',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Disable compression of the file contents.')
),
])
def check_for_periodical(self):
if self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
self.periodicalize_toc()
self.check_for_masthead()
self.opts.mobi_periodical = True
else:
self.opts.mobi_periodical = False
def check_for_masthead(self):
found = False
for typ in self.oeb.guide:
if type == 'masthead':
found = True
break
if not found:
self.oeb.log.debug('No masthead found, generating default one...')
from calibre.resources import server_resources
try:
from PIL import Image as PILImage
PILImage
except ImportError:
import Image as PILImage
raw = StringIO(server_resources['calibre.png'])
im = PILImage.open(raw)
of = StringIO()
im.save(of, 'GIF')
raw = of.getvalue()
id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href)
def periodicalize_toc(self):
from calibre.ebooks.oeb.base import TOC
toc = self.oeb.toc
if toc and toc[0].klass != 'periodical':
self.log('Converting TOC for MOBI periodical indexing...')
articles = {}
if toc.depth < 3:
sections = [TOC(klass='section')]
for x in toc:
sections[0].append(x)
else:
sections = list(toc)
for x in sections:
x.klass = 'section'
for sec in sections:
articles[id(sec)] = []
for a in list(sec):
a.klass = 'article'
articles[id(sec)].append(a)
sec.nodes.remove(a)
root = TOC(klass='periodical',
title=unicode(self.oeb.metadata.title[0]))
for s in sections:
if articles[id(s)]:
for a in articles[id(s)]:
s.nodes.append(a)
root.nodes.append(s)
for x in list(toc.nodes):
toc.nodes.remove(x)
toc.nodes.append(root)
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb
from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
@ -60,6 +125,7 @@ class MOBIOutput(OutputFormatPlugin):
rasterizer(oeb, opts)
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts)
self.check_for_periodical()
write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
writer = MobiWriter(opts, imagemax=imagemax,
compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,

File diff suppressed because it is too large Load Diff

View File

@ -47,7 +47,7 @@ XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS,
'mbp': MBP_NS }
'mbp': MBP_NS, 'calibre': CALIBRE_NS }
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
@ -455,7 +455,8 @@ class Metadata(object):
'description', 'format', 'identifier', 'language',
'publisher', 'relation', 'rights', 'source',
'subject', 'title', 'type'])
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp'])
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
'publication_type'])
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
'scheme': OPF('scheme'), 'event': OPF('event'),
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}

View File

@ -192,6 +192,7 @@ class OEBReader(object):
if not scheme and href not in known:
new.add(href)
unchecked.clear()
warned = set([])
for href in new:
known.add(href)
is_invalid = False
@ -202,9 +203,13 @@ class OEBReader(object):
if is_invalid:
continue
if not self.oeb.container.exists(href):
if href not in warned:
self.logger.warn('Referenced file %r not found' % href)
warned.add(href)
continue
if href not in warned:
self.logger.warn('Referenced file %r not in manifest' % href)
warned.add(href)
id, _ = manifest.generate(id='added')
guessed = guess_type(href)[0]
media_type = guessed or BINARY_MIME
@ -330,14 +335,14 @@ class OEBReader(object):
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
authorElement = xpath(child,
'descendant::mbp:meta[@name = "author"]')
'descendant::calibre:meta[@name = "author"]')
if authorElement :
author = authorElement[0].text
else :
author = None
descriptionElement = xpath(child,
'descendant::mbp:meta[@name = "description"]')
'descendant::calibre:meta[@name = "description"]')
if descriptionElement :
description = descriptionElement[0].text
else :

View File

@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log):
if mi.timestamp is not None:
m.clear('timestamp')
m.add('timestamp', mi.timestamp.isoformat())
if mi.rights is not None:
m.clear('rights')
m.add('rights', mi.rights)
if mi.publication_type is not None:
m.clear('publication_type')
m.add('publication_type', mi.publication_type)
if not m.timestamp:
m.add('timestamp', datetime.utcnow().isoformat())
class MergeMetadata(object):
'Merge in user metadata, including cover'

View File

@ -13,6 +13,7 @@ from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
from calibre.ebooks import ConversionError
def XPath(x):
try:
return etree.XPath(x, namespaces=XPNSMAP)

View File

@ -64,8 +64,5 @@ class ManifestTrimmer(object):
unchecked = new
for item in oeb.manifest.values():
if item not in used:
if getattr(self.opts, 'mobi_periodical', False) and \
item.href == 'images/mastheadImage.gif':
continue
oeb.logger.info('Trimming %r from manifest' % item.href)
oeb.manifest.remove(item)

View File

@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'mobi_output',
['prefer_author_sort', 'rescale_images', 'toc_title',
'dont_compress', 'mobi_periodical']
'dont_compress',]
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -41,7 +41,7 @@
</property>
</widget>
</item>
<item row="6" column="0">
<item row="5" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -61,13 +61,6 @@
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="opt_mobi_periodical">
<property name="text">
<string>Generate a periodical rather than a book</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc">
<property name="text">

View File

@ -16,6 +16,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, QString, \
from calibre import strftime
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.pyparsing import ParseException
from calibre.library.database2 import FIELD_MAP
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog
@ -267,7 +268,11 @@ class BooksModel(QAbstractTableModel):
self.count_changed()
def search(self, text, refinement, reset=True):
try:
self.db.search(text)
except ParseException:
self.emit(SIGNAL('parse_exception()'))
return
self.last_search = text
if reset:
self.clear_caches()
@ -898,7 +903,12 @@ class DeviceBooksModel(BooksModel):
if not text or not text.strip():
self.map = list(range(len(self.db)))
else:
try:
matches = self.search_engine.parse(text)
except ParseException:
self.emit(SIGNAL('parse_exception()'))
return
self.map = []
for i in range(len(self.db)):
if i in matches:

View File

@ -221,9 +221,20 @@ class LibraryServer(object):
def get_format(self, id, format):
format = format.upper()
fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
fmt = self.db.format(id, format, index_is_id=True, as_file=True,
mode='r+b')
if fmt is None:
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
if format == 'EPUB':
from tempfile import TemporaryFile
from calibre.ebooks.metadata.meta import set_metadata
raw = fmt.read()
fmt = TemporaryFile()
fmt.write(raw)
fmt.seek(0)
set_metadata(fmt, self.db.get_metadata(id, index_is_id=True),
'epub')
fmt.seek(0)
mt = guess_type('dummy.'+format.lower())[0]
if mt is None:
mt = 'application/octet-stream'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe):
#: Delay between consecutive downloads in seconds
delay = 0
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'unknown'
#: Number of simultaneous downloads. Set to 1 if the server is picky.
#: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
simultaneous_downloads = 5
@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe):
mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
mi.publication_type = 'periodical:'+self.publication_type
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe):
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
desc = a.text_summary
if not desc:
desc = None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po)
play_order=po, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe):
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po))
desc = f.description
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)

View File

@ -7,8 +7,8 @@ clarin.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Clarin(BasicNewsRecipe):
title = 'Clarin'
@ -22,6 +22,11 @@ class Clarin(BasicNewsRecipe):
no_stylesheets = True
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
remove_javascript = True
encoding = 'cp1252'
language = _('Spanish')
lang = 'es-AR'
direction = 'ltr'
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
html2lrf_options = [
'--comment', description
@ -29,7 +34,7 @@ class Clarin(BasicNewsRecipe):
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
remove_tags = [
dict(name='a' , attrs={'class':'Imp' })
@ -48,17 +53,20 @@ class Clarin(BasicNewsRecipe):
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
]
def get_article_url(self, article):
artl = article.get('link', None)
rest = artl.partition('-0')[-1]
def print_version(self, url):
rest = url.partition('-0')[-1]
lmain = rest.partition('.')[0]
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
return lurl
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -10,17 +10,31 @@ from calibre.web.feeds.news import BasicNewsRecipe
class WallStreetJournal(BasicNewsRecipe):
title = 'The Wall Street Journal'
__author__ = 'Kovid Goyal'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'News and current affairs.'
needs_subscription = True
language = _('English')
max_articles_per_feed = 10
timefmt = ' [%a, %b %d, %Y]'
no_stylesheets = True
extra_css = '''h1{color:#093D72 ; font-size:large ; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; }
h2{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
.subhead{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
.insettipUnit {color:#666666; font-family:Arial,Sans-serif;font-size:xx-small }
.targetCaption{ font-size:x-small; color:#333333; font-family:Arial,Helvetica,sans-serif}
.article{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
.tagline {color:#333333; font-size:xx-small}
.dateStamp {color:#666666; font-family:Arial,Helvetica,sans-serif}
h3{color:blue ;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
.byline{color:blue;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
h6{color:#333333; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic; }
.paperLocation{color:#666666; font-size:xx-small}'''
remove_tags_before = dict(name='h1')
remove_tags = [
dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive"]),
{'class':['more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow"]),
{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map','insettip','more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
dict(rel='shortcut icon'),
]
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]