Sync to trunk.

2025-06-23 15:30:45 -04:00 · 2009-07-23 17:34:06 -04:00 · 2009-07-23 17:34:06 -04:00 · 50b71bd449
commit 50b71bd449
parent ecd4405e7d 3829b4c20f
30 changed files with 540 additions and 191 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -361,6 +361,8 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
        return '&'+ent+';'
    if ent == 'apos':
        return "'"
+    if ent == 'hellips':
+        ent = 'hellip'
    if ent.startswith(u'#x'):
        num = int(ent[2:], 16)
        if encoding is None or num > 255:
@ -382,6 +384,15 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
    except KeyError:
        return '&'+ent+';'

+_ent_pat = re.compile(r'&(\S+);')
+
+def prepare_string_for_xml(raw, attribute=False):
+    raw = _ent_pat.sub(entity_to_unicode, raw)
+    raw = raw.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+    if attribute:
+        raw = raw.replace('"', '&quot;').replace("'", '&apos;')
+    return raw
+
 if isosx:
    fdir = os.path.expanduser('~/.fonts')
    try:
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.0b16'
+__version__   = '0.6.0b17'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -57,6 +57,35 @@ class HTMLRenderer(object):
            self.loop.exit(0)


+def extract_cover_from_embedded_svg(html, base, log):
+    from lxml import etree
+    from calibre.ebooks.oeb.base import XPath, SVG, XLINK
+    root = etree.fromstring(html)
+
+    svg = XPath('//svg:svg')(root)
+    if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'):
+        image = svg[0][0]
+        href = image.get(XLINK('href'), None)
+        path = os.path.join(base, *href.split('/'))
+        if href and os.access(path, os.R_OK):
+            return open(path, 'rb').read()
+
+def render_html_svg_workaround(path_to_html, log, width=590, height=750):
+    from calibre.ebooks.oeb.base import SVG_NS
+    raw = open(path_to_html, 'rb').read()
+    data = None
+    if SVG_NS in raw:
+        try:
+            data = extract_cover_from_embedded_svg(raw,
+                   os.path.dirname(path_to_html), log)
+        except:
+            pass
+    if data is None:
+        renderer = render_html(path_to_html, width, height)
+        data = getattr(renderer, 'data', None)
+    return data
+
+
 def render_html(path_to_html, width=590, height=750):
    from PyQt4.QtWebKit import QWebPage
    from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -54,7 +54,7 @@ class EPUBInput(InputFormatPlugin):
        return False

    @classmethod
-    def rationalize_cover(self, opf):
+    def rationalize_cover(self, opf, log):
        guide_cover, guide_elem = None, None
        for guide_elem in opf.iterguide():
            if guide_elem.get('type', '').lower() == 'cover':
@ -65,28 +65,37 @@ class EPUBInput(InputFormatPlugin):
        spine = list(opf.iterspine())
        if not spine:
            return
+        # Check if the cover specified in the guide is also
+        # the first element in spine
        idref = spine[0].get('idref', '')
        manifest = list(opf.itermanifest())
        if not manifest:
            return
-        if manifest[0].get('id', False) != idref:
+        elem = [x for x in manifest if x.get('id', '') == idref]
+        if not elem or elem[0].get('href', None) != guide_cover:
            return
+        log('Found HTML cover', guide_cover)
+
+        # Remove from spine as covers must be treated
+        # specially
        spine[0].getparent().remove(spine[0])
        guide_elem.set('href', 'calibre_raster_cover.jpg')
+        from calibre.ebooks.oeb.base import OPF
+        t = etree.SubElement(elem[0].getparent(), OPF('item'),
+        href=guide_elem.get('href'), id='calibre_raster_cover')
+        t.set('media-type', 'image/jpeg')
        for elem in list(opf.iterguide()):
            if elem.get('type', '').lower() == 'titlepage':
                elem.getparent().remove(elem)
-        from calibre.ebooks.oeb.base import OPF
        t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
        t.set('type', 'titlepage')
        t.set('href', guide_cover)
        t.set('title', 'Title Page')
-        from calibre.ebooks import render_html
-        renderer = render_html(guide_cover)
+        from calibre.ebooks import render_html_svg_workaround
+        renderer = render_html_svg_workaround(guide_cover, log)
        if renderer is not None:
            open('calibre_raster_cover.jpg', 'wb').write(
-                renderer.data)
-
+                renderer)

    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
@ -121,7 +130,7 @@ class EPUBInput(InputFormatPlugin):
            for elem in opf.iterguide():
                elem.set('href', delta+elem.get('href'))

-        self.rationalize_cover(opf)
+        self.rationalize_cover(opf, log)

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -12,7 +12,7 @@ from urllib import unquote
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
-from calibre import strftime, guess_type
+from calibre import strftime, guess_type, prepare_string_for_xml
 from calibre.customize.conversion import OptionRecommendation

 from lxml import etree
@ -210,6 +210,7 @@ class EPUBOutput(OutputFormatPlugin):
        id, href = self.oeb.manifest.generate('calibre-logo',
                'calibre-logo.png')
        self.oeb.manifest.add(id, href, 'image/png', data=img_data)
+        title, author = map(prepare_string_for_xml, (title, author))
        html = self.TITLEPAGE%dict(title=title, author=author,
                date=strftime('%d %b, %Y'),
                app=__appname__ +' '+__version__,
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -5,14 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 '''Read meta information from epub files'''

-import os, time
+import os
 from cStringIO import StringIO
 from contextlib import closing

-from PyQt4.Qt import QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
-                     SIGNAL, QPainter, QImage, QObject, QApplication, Qt, QPalette
-from PyQt4.QtWebKit import QWebPage
-
 from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre.ebooks.metadata import MetaInformation
@ -102,64 +98,9 @@ class OCFDirReader(OCFReader):
    def open(self, path, *args, **kwargs):
        return open(os.path.join(self.root, path), *args, **kwargs)

-class CoverRenderer(QObject):
-    WIDTH  = 600
-    HEIGHT = 800
-
-    def __init__(self, path):
-        if QApplication.instance() is None:
-            QApplication([])
-        QObject.__init__(self)
-        self.loop = QEventLoop()
-        self.page = QWebPage()
-        pal = self.page.palette()
-        pal.setBrush(QPalette.Background, Qt.white)
-        self.page.setPalette(pal)
-        self.page.setViewportSize(QSize(self.WIDTH, self.HEIGHT))
-        self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
-        self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
-        QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
-        self._image_data = None
-        self.rendered = False
-        url = QUrl.fromLocalFile(os.path.normpath(path))
-        self.page.mainFrame().load(url)
-
-    def render_html(self, ok):
-        try:
-            if not ok:
-                self.rendered = True
-                return
-            image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
-            image.setDotsPerMeterX(96*(100/2.54))
-            image.setDotsPerMeterY(96*(100/2.54))
-            painter = QPainter(image)
-            self.page.mainFrame().render(painter)
-            painter.end()
-            ba = QByteArray()
-            buf = QBuffer(ba)
-            buf.open(QBuffer.WriteOnly)
-            image.save(buf, 'JPEG')
-            self._image_data = str(ba.data())
-        finally:
-            self.loop.exit(0)
-        self.rendered = True
-
-    def image_data():
-        def fget(self):
-            if not self.rendered:
-                self.loop.exec_()
-                count = 0
-                while count < 50 and not self.rendered:
-                    time.sleep(0.1)
-                    count += 1
-            return self._image_data
-        return property(fget=fget)
-    image_data = image_data()
-
-
 def get_cover(opf, opf_path, stream):
-    from calibre.gui2 import is_ok_to_use_qt
-    if not is_ok_to_use_qt(): return None
+    from calibre.ebooks import render_html_svg_workaround
+    from calibre.utils.logging import default_log
    spine = list(opf.spine_items())
    if not spine:
        return
@ -172,8 +113,7 @@ def get_cover(opf, opf_path, stream):
            cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
            if not os.path.exists(cpage):
                return
-            cr = CoverRenderer(cpage)
-            return cr.image_data
+            return render_html_svg_workaround(cpage, default_log)

 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a :class:`MetaInformation` object """
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -443,7 +443,7 @@ class MobiReader(object):
            self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
        self.processed_html = self.processed_html.replace('\r\n', '\n')
        self.processed_html = self.processed_html.replace('> <', '>\n<')
-        self.processed_html = re.sub('\x14|\x15|\x1c|\x1d', '', self.processed_html)
+        self.processed_html = re.sub('\x14|\x15|\x1c|\x1d|\xef|\x12|\x13|\xec', '', self.processed_html)

    def ensure_unit(self, raw, unit='px'):
        if re.search(r'\d+$', raw) is not None:
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -1556,7 +1556,8 @@ class MobiWriter(object):
        else:
            raise NotImplementedError("missing date or timestamp needed for mobi_periodical")

-        if oeb.metadata.cover:
+        if oeb.metadata.cover and \
+                unicode(oeb.metadata.cover[0]) in oeb.manifest.ids:
            id = unicode(oeb.metadata.cover[0])
            item = oeb.manifest.ids[id]
            href = item.href
@ -2028,7 +2029,7 @@ class MobiWriter(object):
        indices.write(pack('>H', pos))								# Save the offset for IDXTIndices
        name = "%04X"%count
        indxt.write(chr(len(name)) + name)							# Write the name
-        indxt.write(INDXT['periodical'])						    # entryType [0x0F | 0xDF | 0xFF | 0x3F]
+        indxt.write(INDXT['periodical'])                            # entryType [0x0F | 0xDF | 0xFF | 0x3F]
        indxt.write(chr(1))                                         # subType 1
        indxt.write(decint(offset, DECINT_FORWARD))					# offset
        indxt.write(decint(length, DECINT_FORWARD))					# length
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -759,6 +759,15 @@ class Manifest(object):
            return u'Item(id=%r, href=%r, media_type=%r)' \
                % (self.id, self.href, self.media_type)

+        def _parse_xml(self, data):
+            try:
+                return etree.fromstring(data)
+            except etree.XMLSyntaxError, err:
+                if getattr(err, 'code', 0) == 26 or str(err).startswith('Entity'):
+                    data = xml_to_unicode(data, strip_encoding_pats=True,
+                            resolve_entities=True)[0]
+                    return etree.fromstring(data)
+
        def _parse_xhtml(self, data):
            self.oeb.log.debug('Parsing', self.href, '...')
            # Convert to Unicode and normalize line endings
@ -952,7 +961,7 @@ class Manifest(object):
                elif self.media_type.lower() in OEB_DOCS:
                    data = self._parse_xhtml(data)
                elif self.media_type.lower()[-4:] in ('+xml', '/xml'):
-                    data = etree.fromstring(data)
+                    data = self._parse_xml(data)
                elif self.media_type.lower() in OEB_STYLES:
                    data = self._parse_css(data)
                elif 'text' in self.media_type.lower():
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -27,7 +27,6 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
                                    OEBError, OEBBook, DirContainer
 from calibre.ebooks.oeb.writer import OEBWriter
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
-from calibre.ebooks.metadata.epub import CoverRenderer
 from calibre.startup import get_lang
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
@ -343,8 +342,11 @@ class OEBReader(object):

            descriptionElement = xpath(child,
                    'descendant::calibre:meta[@name = "description"]')
-            if descriptionElement :
-                description = descriptionElement[0].text
+            if descriptionElement:
+                description = etree.tostring(descriptionElement[0],
+                method='text', encoding=unicode).strip()
+                if not description:
+                    description = None
            else :
                description = None

@ -524,12 +526,14 @@ class OEBReader(object):
        return

    def _cover_from_html(self, hcover):
+        from calibre.ebooks import render_html_svg_workaround
        with TemporaryDirectory('_html_cover') as tdir:
            writer = OEBWriter()
            writer(self.oeb, tdir)
            path = os.path.join(tdir, urlunquote(hcover.href))
-            renderer = CoverRenderer(path)
-            data = renderer.image_data
+            data = render_html_svg_workaround(path, self.logger)
+            if not data:
+                data = ''
        id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
        item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
        return item
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -97,6 +97,8 @@ class MergeMetadata(object):
        id = old_cover = None
        if 'cover' in self.oeb.guide:
            old_cover = self.oeb.guide['cover']
+        if prefer_metadata_cover and old_cover is not None:
+            cdata = ''
        if cdata:
            self.oeb.guide.remove('cover')
            self.oeb.guide.remove('titlepage')
@ -106,6 +108,10 @@ class MergeMetadata(object):
                if not cdata:
                    return item.id
                self.oeb.manifest.remove(item)
+            elif not cdata:
+                id = self.oeb.manifest.generate(id='cover')
+                self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
+                return id
        if cdata:
            id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
            self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -301,30 +301,26 @@ class FlowSplitter(object):

        # Tree 1
        hit_split_point = False
-        for elem in list(body.iterdescendants(etree.Element)):
+        for elem in list(body.iterdescendants()):
            if elem is split_point:
                hit_split_point = True
                if before:
-                    x = elem.get('id', None)
                    nix_element(elem)

                continue
            if hit_split_point:
-                x = elem.get('id', None)
                nix_element(elem)


        # Tree 2
        hit_split_point = False
-        for elem in list(body2.iterdescendants(etree.Element)):
+        for elem in list(body2.iterdescendants()):
            if elem is split_point2:
                hit_split_point = True
                if not before:
-                    x = elem.get('id', None)
                    nix_element(elem, top=False)
                continue
            if not hit_split_point:
-                x = elem.get('id', None)
                nix_element(elem, top=False)
        body2.text = '\n'

--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -53,7 +53,7 @@
          <item row="2" column="0">
           <widget class="QLabel" name="label_8">
            <property name="text">
-             <string>Author S&amp;ort: </string>
+             <string>Author s&amp;ort: </string>
            </property>
            <property name="alignment">
             <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
@ -118,7 +118,7 @@
          <item row="5" column="0">
           <widget class="QLabel" name="label_4">
            <property name="text">
-             <string>Add Ta&amp;gs: </string>
+             <string>Add ta&amp;gs: </string>
            </property>
            <property name="alignment">
             <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -392,7 +392,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            self.tags.update_tags_cache(self.db.all_tags())

    def fetch_cover(self):
-        isbn   = unicode(self.isbn.text()).strip()
+        isbn   = re.sub(r'[^0-9a-zA-Z]', '', unicode(self.isbn.text())).strip()
        self.fetch_cover_button.setEnabled(False)
        self.setCursor(Qt.WaitCursor)
        title, author = map(unicode, (self.title.text(), self.authors.text()))
@ -510,7 +510,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        aus = qstring_to_unicode(self.author_sort.text())
        if aus:
            self.db.set_author_sort(self.id, aus, notify=False)
-        self.db.set_isbn(self.id, qstring_to_unicode(self.isbn.text()), notify=False)
+        self.db.set_isbn(self.id,
+                 re.sub(r'[^0-9a-zA-Z]', '', unicode(self.isbn.text())), notify=False)
        self.db.set_rating(self.id, 2*self.rating.value(), notify=False)
        self.db.set_publisher(self.id, qstring_to_unicode(self.publisher.currentText()), notify=False)
        self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','), notify=False)
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -1873,13 +1873,19 @@ def main(args=sys.argv):
                return run_gui(opts, args, actions, listener, app)
        else:
            return run_gui(opts, args, actions, listener, app)
+    otherinstance = False
    try:
        listener = Listener(address=ADDRESS)
-    except socket.error: # Good si is correct
-        communicate(args)
+    except socket.error: # Good si is correct (on UNIX)
+        otherinstance = True
    else:
+        # On windows only singleinstance can be trusted
+        otherinstance = True if iswindows else False
+    if not otherinstance:
        return run_gui(opts, args, actions, listener, app)

+    communicate(args)
+
    return 0


--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -20,8 +20,8 @@ What formats does |app| support conversion to/from?
 |app| supports the conversion of many input formats to many output formats.
 It can convert every input format in the following list, to every output format.

-*Input Formats:* CBZ, CBR, CBC, EPUB, FB2, HTML, LIT, MOBI, ODT, PDF, PRC**, RTF, TXT
-*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PDF, TXT
+*Input Formats:* CBZ, CBR, CBC, EPUB, FB2, HTML, LIT, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TXT
+*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TXT

 ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers

--- a/src/calibre/trac/plugins/Changelog.py
+++ b/src/calibre/trac/plugins/Changelog.py
@ -1,7 +1,7 @@
 '''
 Trac Macro to generate an end use Changelog from the svn logs.
 '''
-import re, collections, time
+import re, collections, time, os

 from bzrlib import log as blog, branch

@ -12,48 +12,55 @@ from trac.wiki.macros import WikiMacroBase
 from trac.util import Markup


-BZR_PATH = '/var/bzr/code/calibre/trunk'
+BZR_PATH = '/usr/local/calibre'

 class ChangelogFormatter(blog.LogFormatter):
-    
+
    supports_tags = True
    supports_merge_revisions = False
-    
+    _show_advice = False
+
    def __init__(self, num_of_versions=20):
        self.num_of_versions = num_of_versions
        self.messages = collections.deque()
        self.entries = []
-        self.current_entry = None 
-    
+        self.current_entry = None
+
    def log_revision(self, r):
        if len(self.entries) > self.num_of_versions-1:
            return
        msg = r.rev.message
        match = re.match(r'version\s+(\d+\.\d+.\d+)', msg)
-         
+
        if match:
            if self.current_entry is not None:
                self.entries.append((self.current_entry, set(self.messages)))
            timestamp = r.rev.timezone + r.rev.timestamp
            self.current_entry = match.group(1) + time.strftime(' (%d %b, %Y)', time.gmtime(timestamp))
            self.messages = collections.deque()
-            
+
        else:
            if re.search(r'[a-zA-Z]', msg) and len(msg.strip()) > 5:
                if 'translation' not in msg and not msg.startswith('IGN'):
                    self.messages.append(msg.strip())
-                    
+
    def to_wiki_txt(self):
        txt = ['= Changelog =\n[[PageOutline]]']
        for entry in self.entries:
            txt.append(u'----\n== Version '+entry[0]+' ==')
-            for msg in entry[1]:
-                txt.append(u'  * ' + msg)
-                
+            if entry[0] == '0.6.0':
+                txt.append(u'For a list of new features in 0.6.0 see http://calibre.kovidgoyal.net/new_in_6')
+            else:
+                for msg in entry[1]:
+                    txt.append(u'  * ' + msg)
+
        return u'\n'.join(txt)
-    
+
 def bzr_log_to_txt():
-    b = branch.Branch.open(BZR_PATH)
+    path = BZR_PATH
+    if not os.path.exists(path):
+        path = '/home/kovid/work/calibre'
+    b = branch.Branch.open(path)
    lf = ChangelogFormatter()
    blog.show_log(b, lf)
    return lf.to_wiki_txt()
@ -68,6 +75,6 @@ class ChangeLogMacro(WikiMacroBase):


 if __name__ == '__main__':
-    print bzr_log_to_txt()
-        
-        
+    print bzr_log_to_txt().encode('utf-8')
+
+
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
@ -4,9 +4,9 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: calibre 0.6.0b14\n"
-"POT-Creation-Date: 2009-07-19 12:31+MDT\n"
-"PO-Revision-Date: 2009-07-19 12:31+MDT\n"
+"Project-Id-Version: calibre 0.6.0b16\n"
+"POT-Creation-Date: 2009-07-22 07:39+MDT\n"
+"PO-Revision-Date: 2009-07-22 07:39+MDT\n"
 "Last-Translator: Automatically generated\n"
 "Language-Team: LANGUAGE\n"
 "MIME-Version: 1.0\n"
@ -69,8 +69,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:136
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:138
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:84
-#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:101
-#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:102
+#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:103
+#: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ereader/writer.py:104
 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/input.py:26
 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/palmdoc/writer.py:29
 #: /home/kovid/work/calibre/src/calibre/ebooks/pdb/ztxt/writer.py:27
@ -107,7 +107,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf.py:48
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:106
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/fetch_metadata.py:139
-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:345
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:348
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:34
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:39
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler.py:40
@ -126,8 +126,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1430
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1514
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1599
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1622
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1673
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1621
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1672
 #: /home/kovid/work/calibre/src/calibre/library/server.py:294
 #: /home/kovid/work/calibre/src/calibre/library/server.py:355
 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45
@ -1384,6 +1384,11 @@ msgid ""
 "Fetch a cover image for the book identified by ISBN from LibraryThing.com\n"
 msgstr ""

+#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1053
+#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
+msgid "Cover"
+msgstr ""
+
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:22
 msgid "Modify images to meet Palm device size limitations."
 msgstr ""
@ -1405,14 +1410,10 @@ msgstr ""
 msgid "Disable compression of the file contents."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:101
+#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:103
 msgid "All articles"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
-msgid "Cover"
-msgstr ""
-
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1261
 msgid "Title Page"
 msgstr ""
@ -3662,50 +3663,50 @@ msgstr ""
 msgid "The cover in the %s format is invalid"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:402
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:405
 msgid "Downloading cover..."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:414
-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:419
-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:425
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:417
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:422
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:428
 msgid "Cannot fetch cover"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:415
-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:426
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:418
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:429
 msgid "<b>Could not fetch cover.</b><br/>"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:416
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:419
 msgid "The download timed out."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:420
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:423
 msgid "Could not find cover for this book. Try specifying the ISBN first."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:432
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:435
 msgid "Bad cover"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:433
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:436
 msgid "The cover is not a valid picture"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:472
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:475
 msgid "Cannot fetch metadata"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:473
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:476
 msgid "You must specify at least one of ISBN, Title, Authors or Publisher"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:499
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:502
 msgid "Permission denied"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:500
+#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single.py:503
 msgid "Could not open %s. Is it being used by another program?"
 msgstr ""

--- a/src/calibre/utils/logging.py
+++ b/src/calibre/utils/logging.py
@ -102,3 +102,5 @@ class Log(object):

    def __call__(self, *args, **kwargs):
        self.prints(INFO, *args, **kwargs)
+
+default_log = Log()
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -52,7 +52,7 @@ recipe_modules = ['recipe_' + r for r in (
           'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
           'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
           'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',
-           'eltiempo_hn',
+           'eltiempo_hn', 'slate',
          )]


--- a/src/calibre/web/feeds/recipes/recipe_al_jazeera.py
+++ b/src/calibre/web/feeds/recipes/recipe_al_jazeera.py
@ -12,26 +12,27 @@ class AlJazeera(BasicNewsRecipe):
    title                  = 'Al Jazeera in English'
    __author__             = 'Darko Miletic'
    description            = 'News from Middle East'
+    language               = _('English')
    publisher              = 'Al Jazeera'
    category               = 'news, politics, middle east'
    simultaneous_downloads = 1
-    delay                  = 4    
+    delay                  = 4
    oldest_article         = 1
    max_articles_per_feed  = 100
    no_stylesheets         = True
    encoding               = 'iso-8859-1'
    remove_javascript      = True
    use_embedded_content   = False
-    
+
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True' 
-     
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
+
    keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]

    remove_tags = [
--- a/src/calibre/web/feeds/recipes/recipe_azstarnet.py
+++ b/src/calibre/web/feeds/recipes/recipe_azstarnet.py
@ -12,9 +12,10 @@ class Azstarnet(BasicNewsRecipe):
    title                 = 'Arizona  Daily Star'
    __author__            = 'Darko Miletic'
    description           = 'news from Arizona'
+    language              = _('English')
    publisher             = 'azstarnet.com'
    category              = 'news, politics, Arizona, USA'
-    delay                 = 1    
+    delay                 = 1
    oldest_article        = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
@ -28,8 +29,8 @@ class Azstarnet(BasicNewsRecipe):
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -40,8 +41,8 @@ class Azstarnet(BasicNewsRecipe):
            br['pass' ] = self.password
            br.submit()
        return br
-    
-    
+
+
    keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]

    remove_tags = [
@ -49,15 +50,15 @@ class Azstarnet(BasicNewsRecipe):
                    ,dict(name='div',attrs={'class':'bannerinstory'})
                  ]

-    
+
    feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]

    def preprocess_html(self, soup):
        soup.html['dir' ] = 'ltr'
        soup.html['lang'] = 'en-US'
        mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
-        soup.head.insert(0,mtag)    
+        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
-            del item['style']    
+            del item['style']
        return soup
-    
+
--- a/src/calibre/web/feeds/recipes/recipe_coding_horror.py
+++ b/src/calibre/web/feeds/recipes/recipe_coding_horror.py
@ -14,6 +14,7 @@ class CodingHorror(BasicNewsRecipe):
    description           = 'programming and human factors - Jeff Atwood'
    category              = 'blog, programming'
    publisher             = 'Jeff Atwood'
+    language = _('English')
    author                = 'Jeff Atwood'
    oldest_article        = 30
    max_articles_per_feed = 100
--- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
+++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py
@ -16,6 +16,7 @@ class Sueddeutsche(BasicNewsRecipe):
    use_embedded_content   = False
    timefmt = ' [%a %d %b %Y]'
    max_articles_per_feed = 50
+    language = _('English')
    no_stylesheets = True
    html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
    html2lrf_options = ['--ignore-tables']
--- a/src/calibre/web/feeds/recipes/recipe_moneynews.py
+++ b/src/calibre/web/feeds/recipes/recipe_moneynews.py
@ -11,25 +11,26 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class MoneyNews(BasicNewsRecipe):
    title                 = 'Moneynews.com'
    __author__            = 'Darko Miletic'
-    description           = 'Financial news worldwide'  
+    description           = 'Financial news worldwide'
    publisher             = 'moneynews.com'
-    category              = 'news, finances, USA, business'    
+    language              = _('English')
+    category              = 'news, finances, USA, business'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    
+
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
-                            
-    feeds = [ 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
+
+    feeds = [
              (u'Street Talk'          , u'http://moneynews.newsmax.com/xml/streettalk.xml'  )
             ,(u'Finance News'         , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
             ,(u'Economy'              , u'http://moneynews.newsmax.com/xml/economy.xml'     )
@ -38,12 +39,12 @@ class MoneyNews(BasicNewsRecipe):
             ,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml'   )
            ]

-            
+
    keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
-    
+
    remove_tags = [
                     dict(name='td'   , attrs={'id':'article_fontsize'})
                    ,dict(name='table', attrs={'id':'toolbox'         })
                    ,dict(name='tr'   , attrs={'id':'noprint3'        })
                  ]
-                
+
--- a/src/calibre/web/feeds/recipes/recipe_publico.py
+++ b/src/calibre/web/feeds/recipes/recipe_publico.py
@ -17,7 +17,7 @@ class Publico(BasicNewsRecipe):
    max_articles_per_feed = 30
    encoding='utf-8'
    no_stylesheets = True
-    language = _('Portuguese')
+    language = _('Portugese')
    preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE),  lambda match: ''),]

    feeds          = [
--- a/src/calibre/web/feeds/recipes/recipe_scott_hanselman.py
+++ b/src/calibre/web/feeds/recipes/recipe_scott_hanselman.py
@ -15,6 +15,7 @@ class ScottHanselman(BasicNewsRecipe):
    category              = "Scott, Computer, Zen, .NET, C#, Hanselman, Scott, Weblog, Diabetes, Portland, Zimbabwe, ComputerZen.com - Scott Hanselman's Musings"
    publisher             = 'Scott Hanselman'
    author                = 'Scott Hanselman'
+    language              = _('English')
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
--- a/src/calibre/web/feeds/recipes/recipe_slate.py
+++ b/src/calibre/web/feeds/recipes/recipe_slate.py
@ -0,0 +1,330 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+'''
+Fetches the last 7 days of featured articles from slate.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
+
+class Slate(BasicNewsRecipe):
+    # Method variables for customizing downloads
+    title                   = 'Slate'
+    description             = 'A daily magazine on the Web, offering analysis and commentary about politics, news and culture.'
+    __author__              = 'GRiker@hotmail.com'
+    language                = _('English')
+    max_articles_per_feed   = 40
+    oldest_article          = 7.0
+    recursions              = 0
+    delay                   = 0
+    simultaneous_downloads  = 5
+    timeout                 = 120.0
+    timefmt                 = ''
+    feeds                   = None
+    no_stylesheets          = True
+    encoding                = None
+
+    # Method variables for customizing feed parsing
+    summary_length          = 250
+    use_embedded_content    = None
+
+    # Method variables for pre/post processing of HTML
+    remove_tags             = [ dict(name=['link','style']),
+                                dict(id=['toolbox','site_navigation','article_bottom_tools_cntr',
+                                         'article_bottom_tools','recommend_tab2','bottom_sponsored_links',
+                                         'fray_article_discussion','bizbox_sponsored_links_bottom',
+                                         'page_rightcol','top_banner','also_in_slate_bottom','articlefooter',
+                                         'article_top_wedge','content-top','page-title',
+                                         'block-today039s-business-press-archives','block-blog-roll',
+                                         'block-also-in-tbm','block-most-popular-on-tbm','block-the-best-of-tbm',
+                                         'service-links-bottom','comments','ft']),
+                                dict(attrs={'class':['fray_article_links','clearing','nav',
+                                            'service-links service-links-stack','yui-b last',
+                                            'read-more-comments']})]
+    extra_css = '.headline  {text-align:left;}\n\
+                 .byline    {font:monospace; text-align:left; margin-bottom:0pt;}\n\
+                 .dateline  {text-align:left; height:0pt;}\n\
+                 .source    {align:left;}\n\
+                 .credit    {text-align:right;font-size:smaller;}\n'
+
+    baseURL = 'http://slate.com'
+    section_dates = []
+
+    def tag_to_strings(self, tag):
+        if not tag:
+            return ''
+        if isinstance(tag, basestring):
+            return tag
+        strings = []
+        for item in tag.contents:
+            if isinstance(item, (NavigableString, CData)):
+                strings.append(item.string)
+            elif isinstance(item, Tag):
+                res = self.tag_to_string(item)
+                if res:
+                    strings.append(res)
+        return strings
+
+    def extract_sections(self):
+        soup = self.index_to_soup( self.baseURL )
+
+        soup_top_stories = soup.find(True, attrs={'class':'tap2_topic entry-content'})
+        soup = soup.find(True, attrs={'id':'toc_links_container'})
+
+        todays_section = soup.find(True, attrs={'class':'todaydateline'})
+        self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
+        self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
+
+        older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
+        for older_section in older_section_dates :
+            self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
+
+        headline_stories = soup_top_stories.find('ul')
+        section_lists = soup.findAll('ul')
+        # Prepend the headlines to the first section
+        section_lists[0].insert(0,headline_stories)
+
+        sections = []
+        for section in section_lists :
+            sections.append(section)
+
+        return sections
+
+
+    def extract_section_articles(self, sections_html) :
+        soup = self.index_to_soup(str(sections_html))
+        sections = soup.findAll('ul')
+        articles = {}
+        key = None
+        ans = []
+
+        for (i,section) in enumerate(sections) :
+
+            # Get the section name
+            if section.has_key('id') :
+                key = self.section_dates[i]
+                articles[key] = []
+                ans.append(key)
+            else :
+                continue
+
+            # Get the section article_list
+            article_list = section.findAll('li')
+
+            excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
+            excludedTitleKeywords = ['Gabfest','Slate V']
+            excludedAuthorKeywords = ['Prudence']
+
+            # Extract the article attributes
+            for article in article_list :
+                bylines = self.tag_to_strings(article)
+                url = article.a['href']
+                title = bylines[0]
+                full_title = self.tag_to_string(article)
+
+                author = None
+                description = None
+                pubdate = None
+
+                if len(bylines) == 2 and self.tag_to_string(article).find("Today's Papers") > 0 :
+                    description = "A summary of what's in the major U.S. newspapers."
+
+                if len(bylines) == 3 :
+                    author = bylines[2].strip()
+                    author = re.sub('[\r][\n][\t][\t\t]','', author)
+                    author = re.sub(',','', author)
+                    if bylines[1] is not None :
+                        description = bylines[1]
+                        full_byline = self.tag_to_string(article)
+                        if full_byline.find('major U.S. newspapers') > 0 :
+                            description = "A summary of what's in the major U.S. newspapers."
+
+
+                if len(bylines) > 3  and author is not None:
+                    author += " | "
+                    for (i,substring) in enumerate(bylines[3:]) :
+                        #print "substring: %s" % substring.encode('cp1252')
+                        author += substring.strip()
+                        if i < len(bylines[3:]) :
+                            author += " | "
+
+                # Skip articles whose descriptions contain excluded keywords
+                if description is not None :
+                    excluded = re.compile('|'.join(excludedDescriptionKeywords))
+                    found_excluded = excluded.search(description)
+                    if found_excluded :
+                        continue
+
+                # Skip articles whose title contain excluded keywords
+                if full_title is not None :
+                    excluded = re.compile('|'.join(excludedTitleKeywords))
+                    #self.log("evaluating full_title: %s" % full_title)
+                    found_excluded = excluded.search(full_title)
+                    if found_excluded :
+                        continue
+
+                # Skip articles whose author contain excluded keywords
+                if author is not None :
+                    excluded = re.compile('|'.join(excludedAuthorKeywords))
+                    found_excluded = excluded.search(author)
+                    if found_excluded :
+                        continue
+
+                skip_this_article = False
+                # Check to make sure we're not adding a duplicate
+                for article in articles[key] :
+                    if article['url'] == url :
+                        skip_this_article = True
+                        break
+
+                if skip_this_article :
+                    continue
+
+                # Build the dictionary entry for this article
+                feed = key
+                if not articles.has_key(feed) :
+                    articles[feed] = []
+                articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
+                                           author=author, content=''))
+            # Promote 'newspapers' to top
+            for (i,article) in enumerate(articles[feed]) :
+                if article['description'] is not None :
+                    if article['description'].find('newspapers') > 0 :
+                        articles[feed].insert(0,articles[feed].pop(i))
+
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        ans = self.remove_duplicates(ans)
+        return ans
+
+    def flatten_document(self, ans):
+        flat_articles = []
+        for (i,section) in enumerate(ans) :
+            for article in section[1] :
+                flat_articles.append(article)
+        flat_section = ['All Articles', flat_articles]
+        flat_ans = [flat_section]
+
+        return flat_ans
+
+    def remove_duplicates(self, ans):
+        for (i,section) in enumerate(ans) :
+            for article in section[1] :
+                for (j,subsequent_section) in enumerate(ans[i+1:]) :
+                    for (k,subsequent_article) in enumerate(subsequent_section[1]) :
+                        if article['url'] == subsequent_article['url'] :
+                            del subsequent_section[1][k]
+        return ans
+
+    def print_version(self, url) :
+        return url + 'pagenum/all/'
+
+    # Class methods
+    def parse_index(self) :
+        sections = self.extract_sections()
+        section_list = self.extract_section_articles(sections)
+        section_list = self.flatten_document(section_list)
+        return section_list
+
+
+    def postprocess_html(self, soup, first_fetch) :
+        # Fix up dept_kicker as <h3><em>
+        dept_kicker = soup.find(True, attrs={'class':'department_kicker'})
+        if dept_kicker is not None :
+            kicker_strings = self.tag_to_strings(dept_kicker)
+            kicker = kicker_strings[2] + kicker_strings[3]
+            kicker = re.sub('.','',kicker)
+            h3Tag = Tag(soup, "h3")
+            emTag = Tag(soup, "em")
+            h3Tag.insert(0, emTag)
+            emTag.insert(0,kicker)
+            dept_kicker.replaceWith(h3Tag)
+
+        # Change <h1> to <h2>
+        headline = soup.find("h1")
+        if headline is not None :
+            h2tag = Tag(soup, "h2")
+            h2tag['class'] = "headline"
+            strs = self.tag_to_strings(headline)
+            result = ''
+            for (i,substr) in enumerate(strs) :
+                result += substr
+                if i < len(strs) -1 :
+                    result += '<br />'
+            h2tag.insert(0, result)
+            headline.replaceWith(h2tag)
+
+        # Fix up the concatenated byline and dateline
+        byline = soup.find(True,attrs={'class':'byline'})
+        if byline is not None :
+            bylineTag = Tag(soup,'div')
+            bylineTag['class'] = 'byline'
+            bylineTag.insert(0,self.tag_to_string(byline))
+            byline.replaceWith(bylineTag)
+
+        dateline = soup.find(True, attrs={'class':'dateline'})
+        if dateline is not None :
+            datelineTag = Tag(soup, 'div')
+            datelineTag['class'] = 'dateline'
+            datelineTag.insert(0,self.tag_to_string(dateline))
+            dateline.replaceWith(datelineTag)
+
+        # Change captions to italic, add <hr>
+        for caption in soup.findAll(True, {'class':'caption'}) :
+            if caption is not None:
+                emTag = Tag(soup, "em")
+                emTag.insert(0, '<br />' + self.tag_to_string(caption))
+                hrTag = Tag(soup, 'hr')
+                emTag.insert(1, hrTag)
+                caption.replaceWith(emTag)
+
+        return soup
+
+    def postprocess_book(self, oeb, opts, log) :
+
+        def extract_byline(href) :
+            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
+            byline = soup.find(True,attrs={'class':'byline'})
+            if byline is not None:
+                return self.tag_to_string(byline,use_alt=False)
+            else :
+                return None
+
+        def extract_description(href) :
+            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
+            paragraphs = soup.findAll('p')
+            for p in paragraphs :
+                if self.tag_to_string(p,use_alt=False).startswith('By ') or \
+                   self.tag_to_string(p,use_alt=False).startswith('Posted '):
+                    continue
+
+                images = p.findAll(True, attrs={'class':'imagewrapper'})
+                for image in images :
+                    image.extract()
+                return self.tag_to_string(p,use_alt=False)[:200] + '...'
+
+            return None
+
+        if oeb.toc.depth() == 2 :
+            for article in oeb.toc :
+                if article.author is None :
+                    article.author = extract_byline(article.href)
+
+                if article.description is None :
+                    article.description = extract_description(article.href)
+
+
+        elif oeb.toc.depth() == 3 :
+            for section in oeb.toc :
+                for article in section :
+                    if article.author is None :
+                        article.author = extract_byline(article.href)
+
+                    if article.description is None :
+                        article.description = extract_description(article.href)
+
+
+
--- a/src/calibre/web/feeds/recipes/recipe_stackoverflow.py
+++ b/src/calibre/web/feeds/recipes/recipe_stackoverflow.py
@ -15,6 +15,7 @@ class StackOverflowBlog(BasicNewsRecipe):
    category              = 'blog, programming'
    publisher             = 'StackOverflow team'
    oldest_article        = 30
+    language = _('English')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = True
--- a/src/calibre/web/feeds/recipes/recipe_wired.py
+++ b/src/calibre/web/feeds/recipes/recipe_wired.py
@ -2,36 +2,25 @@
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'

-import re

 from calibre.web.feeds.news import BasicNewsRecipe

 class Wired(BasicNewsRecipe):
-    
+
    title = 'Wired.com'
-    __author__ = 'David Chen <SonyReader<at>DaveChen<dot>org>'
+    __author__ = 'Kovid Goyal'
    description = 'Technology news'
    timefmt  = ' [%Y%b%d  %H%M]'
    language = _('English')
    no_stylesheets = True
-    #html2lrf_options = ['--base-font-size', '16']
-    
-    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-  
-                [
-  
-                ## Remove any banners/links/ads/cruft before the body of the article.
-                (r'<body.*?((<div id="article_body">)|(<div id="st-page-maincontent">)|(<div id="containermain">)|(<p class="ap-story-p">)|(<!-- img_nav -->))', lambda match: '<body><div>'),
-  
-                ## Remove any links/ads/comments/cruft from the end of the body of the article.
-                (r'((<!-- end article content -->)|(<div id="st-custom-afterpagecontent">)|(<p class="ap-story-p">&copy;)|(<div class="entry-footer">)|(<div id="see_also">)|(<p>Via <a href=)|(<div id="ss_nav">)).*?</html>', lambda match : '</div></body></html>'),
-  
-                ## Correctly embed in-line images by removing the surrounding javascript that will be ignored in the conversion
-                (r'<a.*?onclick.*?>.*?(<img .*?>)', lambda match: match.group(1),),
-                
-                ]
-            ]
-    
+
+    remove_tags_before = dict(name='div', id='content')
+    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
+        'footer', 'advertisement', 'blog_subscription_unit',
+        'brightcove_component']),
+        {'class':'entryActions'},
+        dict(name=['noscript', 'script'])]
+
    feeds = [
        ('Top News', 'http://feeds.wired.com/wired/index'),
        ('Culture', 'http://feeds.wired.com/wired/culture'),
@ -47,8 +36,8 @@ class Wired(BasicNewsRecipe):
        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
        ]
-    
+
    def print_version(self, url):
        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
-    
-    
+
+