Sync to trunk.

This commit is contained in:
John Schember 2009-07-23 17:34:06 -04:00
commit 50b71bd449
30 changed files with 540 additions and 191 deletions

View File

@ -361,6 +361,8 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
return '&'+ent+';' return '&'+ent+';'
if ent == 'apos': if ent == 'apos':
return "'" return "'"
if ent == 'hellips':
ent = 'hellip'
if ent.startswith(u'#x'): if ent.startswith(u'#x'):
num = int(ent[2:], 16) num = int(ent[2:], 16)
if encoding is None or num > 255: if encoding is None or num > 255:
@ -382,6 +384,15 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
except KeyError: except KeyError:
return '&'+ent+';' return '&'+ent+';'
_ent_pat = re.compile(r'&(\S+);')
def prepare_string_for_xml(raw, attribute=False):
raw = _ent_pat.sub(entity_to_unicode, raw)
raw = raw.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
if attribute:
raw = raw.replace('"', '&quot;').replace("'", '&apos;')
return raw
if isosx: if isosx:
fdir = os.path.expanduser('~/.fonts') fdir = os.path.expanduser('~/.fonts')
try: try:

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.6.0b16' __version__ = '0.6.0b17'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -57,6 +57,35 @@ class HTMLRenderer(object):
self.loop.exit(0) self.loop.exit(0)
def extract_cover_from_embedded_svg(html, base, log):
from lxml import etree
from calibre.ebooks.oeb.base import XPath, SVG, XLINK
root = etree.fromstring(html)
svg = XPath('//svg:svg')(root)
if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'):
image = svg[0][0]
href = image.get(XLINK('href'), None)
path = os.path.join(base, *href.split('/'))
if href and os.access(path, os.R_OK):
return open(path, 'rb').read()
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
from calibre.ebooks.oeb.base import SVG_NS
raw = open(path_to_html, 'rb').read()
data = None
if SVG_NS in raw:
try:
data = extract_cover_from_embedded_svg(raw,
os.path.dirname(path_to_html), log)
except:
pass
if data is None:
renderer = render_html(path_to_html, width, height)
data = getattr(renderer, 'data', None)
return data
def render_html(path_to_html, width=590, height=750): def render_html(path_to_html, width=590, height=750):
from PyQt4.QtWebKit import QWebPage from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize

View File

@ -54,7 +54,7 @@ class EPUBInput(InputFormatPlugin):
return False return False
@classmethod @classmethod
def rationalize_cover(self, opf): def rationalize_cover(self, opf, log):
guide_cover, guide_elem = None, None guide_cover, guide_elem = None, None
for guide_elem in opf.iterguide(): for guide_elem in opf.iterguide():
if guide_elem.get('type', '').lower() == 'cover': if guide_elem.get('type', '').lower() == 'cover':
@ -65,28 +65,37 @@ class EPUBInput(InputFormatPlugin):
spine = list(opf.iterspine()) spine = list(opf.iterspine())
if not spine: if not spine:
return return
# Check if the cover specified in the guide is also
# the first element in spine
idref = spine[0].get('idref', '') idref = spine[0].get('idref', '')
manifest = list(opf.itermanifest()) manifest = list(opf.itermanifest())
if not manifest: if not manifest:
return return
if manifest[0].get('id', False) != idref: elem = [x for x in manifest if x.get('id', '') == idref]
if not elem or elem[0].get('href', None) != guide_cover:
return return
log('Found HTML cover', guide_cover)
# Remove from spine as covers must be treated
# specially
spine[0].getparent().remove(spine[0]) spine[0].getparent().remove(spine[0])
guide_elem.set('href', 'calibre_raster_cover.jpg') guide_elem.set('href', 'calibre_raster_cover.jpg')
from calibre.ebooks.oeb.base import OPF
t = etree.SubElement(elem[0].getparent(), OPF('item'),
href=guide_elem.get('href'), id='calibre_raster_cover')
t.set('media-type', 'image/jpeg')
for elem in list(opf.iterguide()): for elem in list(opf.iterguide()):
if elem.get('type', '').lower() == 'titlepage': if elem.get('type', '').lower() == 'titlepage':
elem.getparent().remove(elem) elem.getparent().remove(elem)
from calibre.ebooks.oeb.base import OPF
t = etree.SubElement(guide_elem.getparent(), OPF('reference')) t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
t.set('type', 'titlepage') t.set('type', 'titlepage')
t.set('href', guide_cover) t.set('href', guide_cover)
t.set('title', 'Title Page') t.set('title', 'Title Page')
from calibre.ebooks import render_html from calibre.ebooks import render_html_svg_workaround
renderer = render_html(guide_cover) renderer = render_html_svg_workaround(guide_cover, log)
if renderer is not None: if renderer is not None:
open('calibre_raster_cover.jpg', 'wb').write( open('calibre_raster_cover.jpg', 'wb').write(
renderer.data) renderer)
def convert(self, stream, options, file_ext, log, accelerators): def convert(self, stream, options, file_ext, log, accelerators):
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
@ -121,7 +130,7 @@ class EPUBInput(InputFormatPlugin):
for elem in opf.iterguide(): for elem in opf.iterguide():
elem.set('href', delta+elem.get('href')) elem.set('href', delta+elem.get('href'))
self.rationalize_cover(opf) self.rationalize_cover(opf, log)
with open('content.opf', 'wb') as nopf: with open('content.opf', 'wb') as nopf:
nopf.write(opf.render()) nopf.write(opf.render())

View File

@ -12,7 +12,7 @@ from urllib import unquote
from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OutputFormatPlugin
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
from calibre import strftime, guess_type from calibre import strftime, guess_type, prepare_string_for_xml
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from lxml import etree from lxml import etree
@ -210,6 +210,7 @@ class EPUBOutput(OutputFormatPlugin):
id, href = self.oeb.manifest.generate('calibre-logo', id, href = self.oeb.manifest.generate('calibre-logo',
'calibre-logo.png') 'calibre-logo.png')
self.oeb.manifest.add(id, href, 'image/png', data=img_data) self.oeb.manifest.add(id, href, 'image/png', data=img_data)
title, author = map(prepare_string_for_xml, (title, author))
html = self.TITLEPAGE%dict(title=title, author=author, html = self.TITLEPAGE%dict(title=title, author=author,
date=strftime('%d %b, %Y'), date=strftime('%d %b, %Y'),
app=__appname__ +' '+__version__, app=__appname__ +' '+__version__,

View File

@ -5,14 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from epub files''' '''Read meta information from epub files'''
import os, time import os
from cStringIO import StringIO from cStringIO import StringIO
from contextlib import closing from contextlib import closing
from PyQt4.Qt import QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
SIGNAL, QPainter, QImage, QObject, QApplication, Qt, QPalette
from PyQt4.QtWebKit import QWebPage
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -102,64 +98,9 @@ class OCFDirReader(OCFReader):
def open(self, path, *args, **kwargs): def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs) return open(os.path.join(self.root, path), *args, **kwargs)
class CoverRenderer(QObject):
WIDTH = 600
HEIGHT = 800
def __init__(self, path):
if QApplication.instance() is None:
QApplication([])
QObject.__init__(self)
self.loop = QEventLoop()
self.page = QWebPage()
pal = self.page.palette()
pal.setBrush(QPalette.Background, Qt.white)
self.page.setPalette(pal)
self.page.setViewportSize(QSize(self.WIDTH, self.HEIGHT))
self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
self._image_data = None
self.rendered = False
url = QUrl.fromLocalFile(os.path.normpath(path))
self.page.mainFrame().load(url)
def render_html(self, ok):
try:
if not ok:
self.rendered = True
return
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
image.save(buf, 'JPEG')
self._image_data = str(ba.data())
finally:
self.loop.exit(0)
self.rendered = True
def image_data():
def fget(self):
if not self.rendered:
self.loop.exec_()
count = 0
while count < 50 and not self.rendered:
time.sleep(0.1)
count += 1
return self._image_data
return property(fget=fget)
image_data = image_data()
def get_cover(opf, opf_path, stream): def get_cover(opf, opf_path, stream):
from calibre.gui2 import is_ok_to_use_qt from calibre.ebooks import render_html_svg_workaround
if not is_ok_to_use_qt(): return None from calibre.utils.logging import default_log
spine = list(opf.spine_items()) spine = list(opf.spine_items())
if not spine: if not spine:
return return
@ -172,8 +113,7 @@ def get_cover(opf, opf_path, stream):
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
if not os.path.exists(cpage): if not os.path.exists(cpage):
return return
cr = CoverRenderer(cpage) return render_html_svg_workaround(cpage, default_log)
return cr.image_data
def get_metadata(stream, extract_cover=True): def get_metadata(stream, extract_cover=True):
""" Return metadata as a :class:`MetaInformation` object """ """ Return metadata as a :class:`MetaInformation` object """

View File

@ -443,7 +443,7 @@ class MobiReader(object):
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>' self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace('> <', '>\n<')
self.processed_html = re.sub('\x14|\x15|\x1c|\x1d', '', self.processed_html) self.processed_html = re.sub('\x14|\x15|\x1c|\x1d|\xef|\x12|\x13|\xec', '', self.processed_html)
def ensure_unit(self, raw, unit='px'): def ensure_unit(self, raw, unit='px'):
if re.search(r'\d+$', raw) is not None: if re.search(r'\d+$', raw) is not None:

View File

@ -1556,7 +1556,8 @@ class MobiWriter(object):
else: else:
raise NotImplementedError("missing date or timestamp needed for mobi_periodical") raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
if oeb.metadata.cover: if oeb.metadata.cover and \
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids:
id = unicode(oeb.metadata.cover[0]) id = unicode(oeb.metadata.cover[0])
item = oeb.manifest.ids[id] item = oeb.manifest.ids[id]
href = item.href href = item.href

View File

@ -759,6 +759,15 @@ class Manifest(object):
return u'Item(id=%r, href=%r, media_type=%r)' \ return u'Item(id=%r, href=%r, media_type=%r)' \
% (self.id, self.href, self.media_type) % (self.id, self.href, self.media_type)
def _parse_xml(self, data):
try:
return etree.fromstring(data)
except etree.XMLSyntaxError, err:
if getattr(err, 'code', 0) == 26 or str(err).startswith('Entity'):
data = xml_to_unicode(data, strip_encoding_pats=True,
resolve_entities=True)[0]
return etree.fromstring(data)
def _parse_xhtml(self, data): def _parse_xhtml(self, data):
self.oeb.log.debug('Parsing', self.href, '...') self.oeb.log.debug('Parsing', self.href, '...')
# Convert to Unicode and normalize line endings # Convert to Unicode and normalize line endings
@ -952,7 +961,7 @@ class Manifest(object):
elif self.media_type.lower() in OEB_DOCS: elif self.media_type.lower() in OEB_DOCS:
data = self._parse_xhtml(data) data = self._parse_xhtml(data)
elif self.media_type.lower()[-4:] in ('+xml', '/xml'): elif self.media_type.lower()[-4:] in ('+xml', '/xml'):
data = etree.fromstring(data) data = self._parse_xml(data)
elif self.media_type.lower() in OEB_STYLES: elif self.media_type.lower() in OEB_STYLES:
data = self._parse_css(data) data = self._parse_css(data)
elif 'text' in self.media_type.lower(): elif 'text' in self.media_type.lower():

View File

@ -27,7 +27,6 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
OEBError, OEBBook, DirContainer OEBError, OEBBook, DirContainer
from calibre.ebooks.oeb.writer import OEBWriter from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
from calibre.ebooks.metadata.epub import CoverRenderer
from calibre.startup import get_lang from calibre.startup import get_lang
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
@ -343,8 +342,11 @@ class OEBReader(object):
descriptionElement = xpath(child, descriptionElement = xpath(child,
'descendant::calibre:meta[@name = "description"]') 'descendant::calibre:meta[@name = "description"]')
if descriptionElement : if descriptionElement:
description = descriptionElement[0].text description = etree.tostring(descriptionElement[0],
method='text', encoding=unicode).strip()
if not description:
description = None
else : else :
description = None description = None
@ -524,12 +526,14 @@ class OEBReader(object):
return return
def _cover_from_html(self, hcover): def _cover_from_html(self, hcover):
from calibre.ebooks import render_html_svg_workaround
with TemporaryDirectory('_html_cover') as tdir: with TemporaryDirectory('_html_cover') as tdir:
writer = OEBWriter() writer = OEBWriter()
writer(self.oeb, tdir) writer(self.oeb, tdir)
path = os.path.join(tdir, urlunquote(hcover.href)) path = os.path.join(tdir, urlunquote(hcover.href))
renderer = CoverRenderer(path) data = render_html_svg_workaround(path, self.logger)
data = renderer.image_data if not data:
data = ''
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg') id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data) item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
return item return item

View File

@ -97,6 +97,8 @@ class MergeMetadata(object):
id = old_cover = None id = old_cover = None
if 'cover' in self.oeb.guide: if 'cover' in self.oeb.guide:
old_cover = self.oeb.guide['cover'] old_cover = self.oeb.guide['cover']
if prefer_metadata_cover and old_cover is not None:
cdata = ''
if cdata: if cdata:
self.oeb.guide.remove('cover') self.oeb.guide.remove('cover')
self.oeb.guide.remove('titlepage') self.oeb.guide.remove('titlepage')
@ -106,6 +108,10 @@ class MergeMetadata(object):
if not cdata: if not cdata:
return item.id return item.id
self.oeb.manifest.remove(item) self.oeb.manifest.remove(item)
elif not cdata:
id = self.oeb.manifest.generate(id='cover')
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
return id
if cdata: if cdata:
id, href = self.oeb.manifest.generate('cover', 'cover.jpg') id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata) self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)

View File

@ -301,30 +301,26 @@ class FlowSplitter(object):
# Tree 1 # Tree 1
hit_split_point = False hit_split_point = False
for elem in list(body.iterdescendants(etree.Element)): for elem in list(body.iterdescendants()):
if elem is split_point: if elem is split_point:
hit_split_point = True hit_split_point = True
if before: if before:
x = elem.get('id', None)
nix_element(elem) nix_element(elem)
continue continue
if hit_split_point: if hit_split_point:
x = elem.get('id', None)
nix_element(elem) nix_element(elem)
# Tree 2 # Tree 2
hit_split_point = False hit_split_point = False
for elem in list(body2.iterdescendants(etree.Element)): for elem in list(body2.iterdescendants()):
if elem is split_point2: if elem is split_point2:
hit_split_point = True hit_split_point = True
if not before: if not before:
x = elem.get('id', None)
nix_element(elem, top=False) nix_element(elem, top=False)
continue continue
if not hit_split_point: if not hit_split_point:
x = elem.get('id', None)
nix_element(elem, top=False) nix_element(elem, top=False)
body2.text = '\n' body2.text = '\n'

View File

@ -53,7 +53,7 @@
<item row="2" column="0"> <item row="2" column="0">
<widget class="QLabel" name="label_8"> <widget class="QLabel" name="label_8">
<property name="text"> <property name="text">
<string>Author S&amp;ort: </string> <string>Author s&amp;ort: </string>
</property> </property>
<property name="alignment"> <property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set> <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
@ -118,7 +118,7 @@
<item row="5" column="0"> <item row="5" column="0">
<widget class="QLabel" name="label_4"> <widget class="QLabel" name="label_4">
<property name="text"> <property name="text">
<string>Add Ta&amp;gs: </string> <string>Add ta&amp;gs: </string>
</property> </property>
<property name="alignment"> <property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set> <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>

View File

@ -392,7 +392,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.tags.update_tags_cache(self.db.all_tags()) self.tags.update_tags_cache(self.db.all_tags())
def fetch_cover(self): def fetch_cover(self):
isbn = unicode(self.isbn.text()).strip() isbn = re.sub(r'[^0-9a-zA-Z]', '', unicode(self.isbn.text())).strip()
self.fetch_cover_button.setEnabled(False) self.fetch_cover_button.setEnabled(False)
self.setCursor(Qt.WaitCursor) self.setCursor(Qt.WaitCursor)
title, author = map(unicode, (self.title.text(), self.authors.text())) title, author = map(unicode, (self.title.text(), self.authors.text()))
@ -510,7 +510,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
aus = qstring_to_unicode(self.author_sort.text()) aus = qstring_to_unicode(self.author_sort.text())
if aus: if aus:
self.db.set_author_sort(self.id, aus, notify=False) self.db.set_author_sort(self.id, aus, notify=False)
self.db.set_isbn(self.id, qstring_to_unicode(self.isbn.text()), notify=False) self.db.set_isbn(self.id,
re.sub(r'[^0-9a-zA-Z]', '', unicode(self.isbn.text())), notify=False)
self.db.set_rating(self.id, 2*self.rating.value(), notify=False) self.db.set_rating(self.id, 2*self.rating.value(), notify=False)
self.db.set_publisher(self.id, qstring_to_unicode(self.publisher.currentText()), notify=False) self.db.set_publisher(self.id, qstring_to_unicode(self.publisher.currentText()), notify=False)
self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','), notify=False) self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','), notify=False)

View File

@ -1873,13 +1873,19 @@ def main(args=sys.argv):
return run_gui(opts, args, actions, listener, app) return run_gui(opts, args, actions, listener, app)
else: else:
return run_gui(opts, args, actions, listener, app) return run_gui(opts, args, actions, listener, app)
otherinstance = False
try: try:
listener = Listener(address=ADDRESS) listener = Listener(address=ADDRESS)
except socket.error: # Good si is correct except socket.error: # Good si is correct (on UNIX)
communicate(args) otherinstance = True
else: else:
# On windows only singleinstance can be trusted
otherinstance = True if iswindows else False
if not otherinstance:
return run_gui(opts, args, actions, listener, app) return run_gui(opts, args, actions, listener, app)
communicate(args)
return 0 return 0

View File

@ -20,8 +20,8 @@ What formats does |app| support conversion to/from?
|app| supports the conversion of many input formats to many output formats. |app| supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format. It can convert every input format in the following list, to every output format.
*Input Formats:* CBZ, CBR, CBC, EPUB, FB2, HTML, LIT, MOBI, ODT, PDF, PRC**, RTF, TXT *Input Formats:* CBZ, CBR, CBC, EPUB, FB2, HTML, LIT, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TXT
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PDF, TXT *Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TXT
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers

View File

@ -1,7 +1,7 @@
''' '''
Trac Macro to generate an end use Changelog from the svn logs. Trac Macro to generate an end use Changelog from the svn logs.
''' '''
import re, collections, time import re, collections, time, os
from bzrlib import log as blog, branch from bzrlib import log as blog, branch
@ -12,12 +12,13 @@ from trac.wiki.macros import WikiMacroBase
from trac.util import Markup from trac.util import Markup
BZR_PATH = '/var/bzr/code/calibre/trunk' BZR_PATH = '/usr/local/calibre'
class ChangelogFormatter(blog.LogFormatter): class ChangelogFormatter(blog.LogFormatter):
supports_tags = True supports_tags = True
supports_merge_revisions = False supports_merge_revisions = False
_show_advice = False
def __init__(self, num_of_versions=20): def __init__(self, num_of_versions=20):
self.num_of_versions = num_of_versions self.num_of_versions = num_of_versions
@ -47,13 +48,19 @@ class ChangelogFormatter(blog.LogFormatter):
txt = ['= Changelog =\n[[PageOutline]]'] txt = ['= Changelog =\n[[PageOutline]]']
for entry in self.entries: for entry in self.entries:
txt.append(u'----\n== Version '+entry[0]+' ==') txt.append(u'----\n== Version '+entry[0]+' ==')
if entry[0] == '0.6.0':
txt.append(u'For a list of new features in 0.6.0 see http://calibre.kovidgoyal.net/new_in_6')
else:
for msg in entry[1]: for msg in entry[1]:
txt.append(u' * ' + msg) txt.append(u' * ' + msg)
return u'\n'.join(txt) return u'\n'.join(txt)
def bzr_log_to_txt(): def bzr_log_to_txt():
b = branch.Branch.open(BZR_PATH) path = BZR_PATH
if not os.path.exists(path):
path = '/home/kovid/work/calibre'
b = branch.Branch.open(path)
lf = ChangelogFormatter() lf = ChangelogFormatter()
blog.show_log(b, lf) blog.show_log(b, lf)
return lf.to_wiki_txt() return lf.to_wiki_txt()
@ -68,6 +75,6 @@ class ChangeLogMacro(WikiMacroBase):
if __name__ == '__main__': if __name__ == '__main__':
print bzr_log_to_txt() print bzr_log_to_txt().encode('utf-8')

View File

@ -4,9 +4,9 @@
# #
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: calibre 0.6.0b14\n" "Project-Id-Version: calibre 0.6.0b16\n"
"POT-Creation-Date: 2009-07-19 12:31+MDT\n" "POT-Creation-Date: 2009-07-22 07:39+MDT\n"
"PO-Revision-Date: 2009-07-19 12:31+MDT\n" "PO-Revision-Date: 2009-07-22 07:39+MDT\n"
"Last-Translator: Automatically generated\n" "Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n" "Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
@ -69,8 +69,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:136 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:136
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:138 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:138
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:84 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:84
#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:101 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:103
#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:102 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:104
#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:26 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:26
#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/palmdoc/writer.py:29 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/palmdoc/writer.py:29
#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ztxt/writer.py:27 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ztxt/writer.py:27
@ -107,7 +107,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf.py:48 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf.py:48
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:106 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:106
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:139 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:139
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:345 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:348
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:34 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:34
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:39 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:39
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:40 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:40
@ -126,8 +126,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1430 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1430
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1514 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1514
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1599 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1599
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1622 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1621
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1673 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1672
#: /home/kovid/work/calibre/src/calibre/library/server.py:294 #: /home/kovid/work/calibre/src/calibre/library/server.py:294
#: /home/kovid/work/calibre/src/calibre/library/server.py:355 #: /home/kovid/work/calibre/src/calibre/library/server.py:355
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45
@ -1384,6 +1384,11 @@ msgid ""
"Fetch a cover image for the book identified by ISBN from LibraryThing.com\n" "Fetch a cover image for the book identified by ISBN from LibraryThing.com\n"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1053
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
msgid "Cover"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:22 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:22
msgid "Modify images to meet Palm device size limitations." msgid "Modify images to meet Palm device size limitations."
msgstr "" msgstr ""
@ -1405,14 +1410,10 @@ msgstr ""
msgid "Disable compression of the file contents." msgid "Disable compression of the file contents."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:101 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:103
msgid "All articles" msgid "All articles"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
msgid "Cover"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1261 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1261
msgid "Title Page" msgid "Title Page"
msgstr "" msgstr ""
@ -3662,50 +3663,50 @@ msgstr ""
msgid "The cover in the %s format is invalid" msgid "The cover in the %s format is invalid"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:402 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:405
msgid "Downloading cover..." msgid "Downloading cover..."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:414 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:417
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:419 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:422
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:425 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:428
msgid "Cannot fetch cover" msgid "Cannot fetch cover"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:415 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:418
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:426 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:429
msgid "<b>Could not fetch cover.</b><br/>" msgid "<b>Could not fetch cover.</b><br/>"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:416 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:419
msgid "The download timed out." msgid "The download timed out."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:420 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:423
msgid "Could not find cover for this book. Try specifying the ISBN first." msgid "Could not find cover for this book. Try specifying the ISBN first."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:432 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:435
msgid "Bad cover" msgid "Bad cover"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:433 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:436
msgid "The cover is not a valid picture" msgid "The cover is not a valid picture"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:472 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:475
msgid "Cannot fetch metadata" msgid "Cannot fetch metadata"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:473 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:476
msgid "You must specify at least one of ISBN, Title, Authors or Publisher" msgid "You must specify at least one of ISBN, Title, Authors or Publisher"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:499 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:502
msgid "Permission denied" msgid "Permission denied"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:500 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:503
msgid "Could not open %s. Is it being used by another program?" msgid "Could not open %s. Is it being used by another program?"
msgstr "" msgstr ""

View File

@ -102,3 +102,5 @@ class Log(object):
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
self.prints(INFO, *args, **kwargs) self.prints(INFO, *args, **kwargs)
default_log = Log()

View File

@ -52,7 +52,7 @@ recipe_modules = ['recipe_' + r for r in (
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate', 'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna', 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',
'eltiempo_hn', 'eltiempo_hn', 'slate',
)] )]

View File

@ -12,6 +12,7 @@ class AlJazeera(BasicNewsRecipe):
title = 'Al Jazeera in English' title = 'Al Jazeera in English'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Middle East' description = 'News from Middle East'
language = _('English')
publisher = 'Al Jazeera' publisher = 'Al Jazeera'
category = 'news, politics, middle east' category = 'news, politics, middle east'
simultaneous_downloads = 1 simultaneous_downloads = 1

View File

@ -12,6 +12,7 @@ class Azstarnet(BasicNewsRecipe):
title = 'Arizona Daily Star' title = 'Arizona Daily Star'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'news from Arizona' description = 'news from Arizona'
language = _('English')
publisher = 'azstarnet.com' publisher = 'azstarnet.com'
category = 'news, politics, Arizona, USA' category = 'news, politics, Arizona, USA'
delay = 1 delay = 1

View File

@ -14,6 +14,7 @@ class CodingHorror(BasicNewsRecipe):
description = 'programming and human factors - Jeff Atwood' description = 'programming and human factors - Jeff Atwood'
category = 'blog, programming' category = 'blog, programming'
publisher = 'Jeff Atwood' publisher = 'Jeff Atwood'
language = _('English')
author = 'Jeff Atwood' author = 'Jeff Atwood'
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -16,6 +16,7 @@ class Sueddeutsche(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
timefmt = ' [%a %d %b %Y]' timefmt = ' [%a %d %b %Y]'
max_articles_per_feed = 50 max_articles_per_feed = 50
language = _('English')
no_stylesheets = True no_stylesheets = True
html2epub_options = 'linearize_tables = True\nbase_font_size2=14' html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
html2lrf_options = ['--ignore-tables'] html2lrf_options = ['--ignore-tables']

View File

@ -13,6 +13,7 @@ class MoneyNews(BasicNewsRecipe):
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Financial news worldwide' description = 'Financial news worldwide'
publisher = 'moneynews.com' publisher = 'moneynews.com'
language = _('English')
category = 'news, finances, USA, business' category = 'news, finances, USA, business'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -17,7 +17,7 @@ class Publico(BasicNewsRecipe):
max_articles_per_feed = 30 max_articles_per_feed = 30
encoding='utf-8' encoding='utf-8'
no_stylesheets = True no_stylesheets = True
language = _('Portuguese') language = _('Portugese')
preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),] preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),]
feeds = [ feeds = [

View File

@ -15,6 +15,7 @@ class ScottHanselman(BasicNewsRecipe):
category = "Scott, Computer, Zen, .NET, C#, Hanselman, Scott, Weblog, Diabetes, Portland, Zimbabwe, ComputerZen.com - Scott Hanselman's Musings" category = "Scott, Computer, Zen, .NET, C#, Hanselman, Scott, Weblog, Diabetes, Portland, Zimbabwe, ComputerZen.com - Scott Hanselman's Musings"
publisher = 'Scott Hanselman' publisher = 'Scott Hanselman'
author = 'Scott Hanselman' author = 'Scott Hanselman'
language = _('English')
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True

View File

@ -0,0 +1,330 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetches the last 7 days of featured articles from slate.com
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
class Slate(BasicNewsRecipe):
# Method variables for customizing downloads
title = 'Slate'
description = 'A daily magazine on the Web, offering analysis and commentary about politics, news and culture.'
__author__ = 'GRiker@hotmail.com'
language = _('English')
max_articles_per_feed = 40
oldest_article = 7.0
recursions = 0
delay = 0
simultaneous_downloads = 5
timeout = 120.0
timefmt = ''
feeds = None
no_stylesheets = True
encoding = None
# Method variables for customizing feed parsing
summary_length = 250
use_embedded_content = None
# Method variables for pre/post processing of HTML
remove_tags = [ dict(name=['link','style']),
dict(id=['toolbox','site_navigation','article_bottom_tools_cntr',
'article_bottom_tools','recommend_tab2','bottom_sponsored_links',
'fray_article_discussion','bizbox_sponsored_links_bottom',
'page_rightcol','top_banner','also_in_slate_bottom','articlefooter',
'article_top_wedge','content-top','page-title',
'block-today039s-business-press-archives','block-blog-roll',
'block-also-in-tbm','block-most-popular-on-tbm','block-the-best-of-tbm',
'service-links-bottom','comments','ft']),
dict(attrs={'class':['fray_article_links','clearing','nav',
'service-links service-links-stack','yui-b last',
'read-more-comments']})]
extra_css = '.headline {text-align:left;}\n\
.byline {font:monospace; text-align:left; margin-bottom:0pt;}\n\
.dateline {text-align:left; height:0pt;}\n\
.source {align:left;}\n\
.credit {text-align:right;font-size:smaller;}\n'
baseURL = 'http://slate.com'
section_dates = []
def tag_to_strings(self, tag):
if not tag:
return ''
if isinstance(tag, basestring):
return tag
strings = []
for item in tag.contents:
if isinstance(item, (NavigableString, CData)):
strings.append(item.string)
elif isinstance(item, Tag):
res = self.tag_to_string(item)
if res:
strings.append(res)
return strings
def extract_sections(self):
soup = self.index_to_soup( self.baseURL )
soup_top_stories = soup.find(True, attrs={'class':'tap2_topic entry-content'})
soup = soup.find(True, attrs={'id':'toc_links_container'})
todays_section = soup.find(True, attrs={'class':'todaydateline'})
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
for older_section in older_section_dates :
self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
headline_stories = soup_top_stories.find('ul')
section_lists = soup.findAll('ul')
# Prepend the headlines to the first section
section_lists[0].insert(0,headline_stories)
sections = []
for section in section_lists :
sections.append(section)
return sections
def extract_section_articles(self, sections_html) :
soup = self.index_to_soup(str(sections_html))
sections = soup.findAll('ul')
articles = {}
key = None
ans = []
for (i,section) in enumerate(sections) :
# Get the section name
if section.has_key('id') :
key = self.section_dates[i]
articles[key] = []
ans.append(key)
else :
continue
# Get the section article_list
article_list = section.findAll('li')
excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
excludedTitleKeywords = ['Gabfest','Slate V']
excludedAuthorKeywords = ['Prudence']
# Extract the article attributes
for article in article_list :
bylines = self.tag_to_strings(article)
url = article.a['href']
title = bylines[0]
full_title = self.tag_to_string(article)
author = None
description = None
pubdate = None
if len(bylines) == 2 and self.tag_to_string(article).find("Today's Papers") > 0 :
description = "A summary of what's in the major U.S. newspapers."
if len(bylines) == 3 :
author = bylines[2].strip()
author = re.sub('[\r][\n][\t][\t\t]','', author)
author = re.sub(',','', author)
if bylines[1] is not None :
description = bylines[1]
full_byline = self.tag_to_string(article)
if full_byline.find('major U.S. newspapers') > 0 :
description = "A summary of what's in the major U.S. newspapers."
if len(bylines) > 3 and author is not None:
author += " | "
for (i,substring) in enumerate(bylines[3:]) :
#print "substring: %s" % substring.encode('cp1252')
author += substring.strip()
if i < len(bylines[3:]) :
author += " | "
# Skip articles whose descriptions contain excluded keywords
if description is not None :
excluded = re.compile('|'.join(excludedDescriptionKeywords))
found_excluded = excluded.search(description)
if found_excluded :
continue
# Skip articles whose title contain excluded keywords
if full_title is not None :
excluded = re.compile('|'.join(excludedTitleKeywords))
#self.log("evaluating full_title: %s" % full_title)
found_excluded = excluded.search(full_title)
if found_excluded :
continue
# Skip articles whose author contain excluded keywords
if author is not None :
excluded = re.compile('|'.join(excludedAuthorKeywords))
found_excluded = excluded.search(author)
if found_excluded :
continue
skip_this_article = False
# Check to make sure we're not adding a duplicate
for article in articles[key] :
if article['url'] == url :
skip_this_article = True
break
if skip_this_article :
continue
# Build the dictionary entry for this article
feed = key
if not articles.has_key(feed) :
articles[feed] = []
articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
author=author, content=''))
# Promote 'newspapers' to top
for (i,article) in enumerate(articles[feed]) :
if article['description'] is not None :
if article['description'].find('newspapers') > 0 :
articles[feed].insert(0,articles[feed].pop(i))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
ans = self.remove_duplicates(ans)
return ans
def flatten_document(self, ans):
flat_articles = []
for (i,section) in enumerate(ans) :
for article in section[1] :
flat_articles.append(article)
flat_section = ['All Articles', flat_articles]
flat_ans = [flat_section]
return flat_ans
def remove_duplicates(self, ans):
for (i,section) in enumerate(ans) :
for article in section[1] :
for (j,subsequent_section) in enumerate(ans[i+1:]) :
for (k,subsequent_article) in enumerate(subsequent_section[1]) :
if article['url'] == subsequent_article['url'] :
del subsequent_section[1][k]
return ans
def print_version(self, url) :
return url + 'pagenum/all/'
# Class methods
def parse_index(self) :
sections = self.extract_sections()
section_list = self.extract_section_articles(sections)
section_list = self.flatten_document(section_list)
return section_list
def postprocess_html(self, soup, first_fetch) :
# Fix up dept_kicker as <h3><em>
dept_kicker = soup.find(True, attrs={'class':'department_kicker'})
if dept_kicker is not None :
kicker_strings = self.tag_to_strings(dept_kicker)
kicker = kicker_strings[2] + kicker_strings[3]
kicker = re.sub('.','',kicker)
h3Tag = Tag(soup, "h3")
emTag = Tag(soup, "em")
h3Tag.insert(0, emTag)
emTag.insert(0,kicker)
dept_kicker.replaceWith(h3Tag)
# Change <h1> to <h2>
headline = soup.find("h1")
if headline is not None :
h2tag = Tag(soup, "h2")
h2tag['class'] = "headline"
strs = self.tag_to_strings(headline)
result = ''
for (i,substr) in enumerate(strs) :
result += substr
if i < len(strs) -1 :
result += '<br />'
h2tag.insert(0, result)
headline.replaceWith(h2tag)
# Fix up the concatenated byline and dateline
byline = soup.find(True,attrs={'class':'byline'})
if byline is not None :
bylineTag = Tag(soup,'div')
bylineTag['class'] = 'byline'
bylineTag.insert(0,self.tag_to_string(byline))
byline.replaceWith(bylineTag)
dateline = soup.find(True, attrs={'class':'dateline'})
if dateline is not None :
datelineTag = Tag(soup, 'div')
datelineTag['class'] = 'dateline'
datelineTag.insert(0,self.tag_to_string(dateline))
dateline.replaceWith(datelineTag)
# Change captions to italic, add <hr>
for caption in soup.findAll(True, {'class':'caption'}) :
if caption is not None:
emTag = Tag(soup, "em")
emTag.insert(0, '<br />' + self.tag_to_string(caption))
hrTag = Tag(soup, 'hr')
emTag.insert(1, hrTag)
caption.replaceWith(emTag)
return soup
def postprocess_book(self, oeb, opts, log) :
def extract_byline(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find(True,attrs={'class':'byline'})
if byline is not None:
return self.tag_to_string(byline,use_alt=False)
else :
return None
def extract_description(href) :
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
paragraphs = soup.findAll('p')
for p in paragraphs :
if self.tag_to_string(p,use_alt=False).startswith('By ') or \
self.tag_to_string(p,use_alt=False).startswith('Posted '):
continue
images = p.findAll(True, attrs={'class':'imagewrapper'})
for image in images :
image.extract()
return self.tag_to_string(p,use_alt=False)[:200] + '...'
return None
if oeb.toc.depth() == 2 :
for article in oeb.toc :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)

View File

@ -15,6 +15,7 @@ class StackOverflowBlog(BasicNewsRecipe):
category = 'blog, programming' category = 'blog, programming'
publisher = 'StackOverflow team' publisher = 'StackOverflow team'
oldest_article = 30 oldest_article = 30
language = _('English')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = True use_embedded_content = True

View File

@ -2,35 +2,24 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Wired(BasicNewsRecipe): class Wired(BasicNewsRecipe):
title = 'Wired.com' title = 'Wired.com'
__author__ = 'David Chen <SonyReader<at>DaveChen<dot>org>' __author__ = 'Kovid Goyal'
description = 'Technology news' description = 'Technology news'
timefmt = ' [%Y%b%d %H%M]' timefmt = ' [%Y%b%d %H%M]'
language = _('English') language = _('English')
no_stylesheets = True no_stylesheets = True
#html2lrf_options = ['--base-font-size', '16']
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in remove_tags_before = dict(name='div', id='content')
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
[ 'footer', 'advertisement', 'blog_subscription_unit',
'brightcove_component']),
## Remove any banners/links/ads/cruft before the body of the article. {'class':'entryActions'},
(r'<body.*?((<div id="article_body">)|(<div id="st-page-maincontent">)|(<div id="containermain">)|(<p class="ap-story-p">)|(<!-- img_nav -->))', lambda match: '<body><div>'), dict(name=['noscript', 'script'])]
## Remove any links/ads/comments/cruft from the end of the body of the article.
(r'((<!-- end article content -->)|(<div id="st-custom-afterpagecontent">)|(<p class="ap-story-p">&copy;)|(<div class="entry-footer">)|(<div id="see_also">)|(<p>Via <a href=)|(<div id="ss_nav">)).*?</html>', lambda match : '</div></body></html>'),
## Correctly embed in-line images by removing the surrounding javascript that will be ignored in the conversion
(r'<a.*?onclick.*?>.*?(<img .*?>)', lambda match: match.group(1),),
]
]
feeds = [ feeds = [
('Top News', 'http://feeds.wired.com/wired/index'), ('Top News', 'http://feeds.wired.com/wired/index'),