Sync to trunk.

2026-05-20 22:12:37 -04:00 · 2009-07-06 19:14:58 -04:00
parent a2457cb939 4f1c599581
commit 0e8cc35f18
48 changed files with 22137 additions and 20673 deletions
@@ -250,6 +250,19 @@ class KindleDXOutput(OutputProfile):
    def tags_to_string(cls, tags):
        return 'ttt '.join(tags)+'ttt '

+class IlliadOutput(OutputProfile):
+
+    name        = 'Illiad'
+    short_name  = 'illiad'
+    description = _('This profile is intended for the Irex Illiad.')
+
+    screen_size               = (760, 925)
+    comic_screen_size         = (760, 925)
+    dpi                       = 160.0
+    fbase                     = 12
+    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+
+

 output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
        MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
@@ -17,6 +17,34 @@ from calibre.customize.conversion import OptionRecommendation

 from lxml import etree

+block_level_tags = (
+      'address',
+      'body',
+      'blockquote',
+      'center',
+      'dir',
+      'div',
+      'dl',
+      'fieldset',
+      'form',
+      'h1',
+      'h2',
+      'h3',
+      'h4',
+      'h5',
+      'h6',
+      'hr',
+      'isindex',
+      'menu',
+      'noframes',
+      'noscript',
+      'ol',
+      'p',
+      'pre',
+      'table',
+      'ul',
+      )
+

 class EPUBOutput(OutputFormatPlugin):

@@ -197,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin):
            if 'titlepage' in self.oeb.guide.refs:
                self.oeb.guide.refs['titlepage'].href = item.href

-
-
    def condense_ncx(self, ncx_path):
        if not self.opts.pretty_print:
            tree = etree.parse(ncx_path)
@@ -210,46 +236,46 @@ class EPUBOutput(OutputFormatPlugin):
            compressed = etree.tostring(tree.getroot(), encoding='utf-8')
            open(ncx_path, 'wb').write(compressed)

-
-
    def workaround_ade_quirks(self):
        '''
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
-        from calibre.ebooks.oeb.base import XPNSMAP, XHTML, OEB_STYLES
-        from lxml.etree import XPath as _XPath
-        from functools import partial
-        XPath = partial(_XPath, namespaces=XPNSMAP)
+        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename

        for x in self.oeb.spine:
            root = x.data
            body = XPath('//h:body')(root)
            if body:
                body = body[0]
+
            # Replace <br> that are children of <body> as ADE doesn't handle them
            if hasattr(body, 'xpath'):
                for br in XPath('./h:br')(body):
                    if br.getparent() is None:
                        continue
                    try:
-                        sibling = br.itersiblings().next()
+                        prior = br.itersiblings(preceding=True).next()
+                        priortag = barename(prior.tag)
+                        priortext = prior.tail
                    except:
-                        sibling = None
+                        priortag = 'body'
+                        priortext = body.text
+                    if priortext:
+                        priortext = priortext.strip()
                    br.tag = XHTML('p')
                    br.text = u'\u00a0'
-                    if (br.tail and br.tail.strip()) or sibling is None or \
-                    getattr(sibling, 'tag', '') != XHTML('br'):
-                        style = br.get('style', '').split(';')
-                        style = filter(None, map(lambda x: x.strip(), style))
-                        style.append('margin: 0pt; border:0pt; height:0pt')
-                        br.set('style', '; '.join(style))
+                    style = br.get('style', '').split(';')
+                    style = filter(None, map(lambda x: x.strip(), style))
+                    style.append('margin:0pt; border:0pt')
+                    # If the prior tag is a block (including a <br> we replaced)
+                    # then this <br> replacement should have a 1-line height.
+                    # Otherwise it should have no height.
+                    if not priortext and priortag in block_level_tags:
+                        style.append('height:1em')
                    else:
-                        sibling.getparent().remove(sibling)
-                        if sibling.tail:
-                            if not br.tail:
-                                br.tail = ''
-                            br.tail += sibling.tail
+                        style.append('height:0pt')
+                    br.set('style', '; '.join(style))

            for tag in XPath('//h:embed')(root):
                tag.getparent().remove(tag)
@@ -218,7 +218,7 @@ class MetaInformation(object):
                     'isbn', 'tags', 'cover_data', 'application_id', 'guide',
                     'manifest', 'spine', 'toc', 'cover', 'language',
                     'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
-                     'pubdate'):
+                     'pubdate', 'rights', 'publication_type'):
            if hasattr(mi, attr):
                setattr(ans, attr, getattr(mi, attr))

@@ -243,7 +243,8 @@ class MetaInformation(object):
        for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
                  'series', 'series_index', 'rating', 'isbn', 'language',
                  'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
-                  'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'
+                  'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
+                  'rights', 'publication_type',
                  ):
            setattr(self, x, getattr(mi, x, None))

@@ -262,7 +263,8 @@ class MetaInformation(object):
                     'publisher', 'series', 'series_index', 'rating',
                     'isbn', 'application_id', 'manifest', 'spine', 'toc',
                     'cover', 'language', 'guide', 'book_producer',
-                     'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'):
+                     'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
+                     'publication_type'):
            if hasattr(mi, attr):
                val = getattr(mi, attr)
                if val is not None:
@@ -332,6 +334,8 @@ class MetaInformation(object):
            fmt('Timestamp', self.timestamp.isoformat(' '))
        if self.pubdate is not None:
            fmt('Published', self.pubdate.isoformat(' '))
+        if self.rights is not None:
+            fmt('Rights', unicode(self.rights))
        if self.lccn:
            fmt('LCCN', unicode(self.lccn))
        if self.lcc:
@@ -362,6 +366,8 @@ class MetaInformation(object):
            ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
        if self.pubdate is not None:
            ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
+        if self.rights is not None:
+            ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
        for i, x in enumerate(ans):
            ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
        return u'<table>%s</table>'%u'\n'.join(ans)
@@ -7,6 +7,7 @@ import re
     xml:lang="en" 
     xmlns="http://www.daisy.org/z3986/2005/ncx/"
     xmlns:py="http://genshi.edgewall.org/"
+     xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
 >
    <head>
        <meta name="dtb:uid" content="${uid}"/>
@@ -23,6 +24,8 @@ import re
                ${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
            ${'%*s'%(4*level,'')}</navLabel>
            ${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
+            ${'%*s'%(4*level,'')}<calibre:meta py:if="np.author" name="author">${np.author}</calibre:meta>
+            ${'%*s'%(4*level,'')}<calibre:meta py:if="np.description" name="description">${np.description}</calibre:meta>
            <py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
        ${'%*s'%(4*level,'')}</navPoint>
    </py:def>
@@ -19,6 +19,7 @@
        <meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
        <meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
        <meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
+        <meta py:if="mi.publication_type is not None" name="calibre:publication_type" content="${mi.publication_type}" />
        <py:for each="tag in mi.tags">
        <dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
        </py:for>
@@ -440,10 +440,12 @@ class OPF(object):
    language        = MetadataField('language')
    comments        = MetadataField('description')
    category        = MetadataField('category')
+    rights          = MetadataField('rights')
    series          = MetadataField('series', is_dc=False)
    series_index    = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
    rating          = MetadataField('rating', is_dc=False, formatter=int)
    pubdate         = MetadataField('date', formatter=parser.parse)
+    publication_type = MetadataField('publication_type', is_dc=False)
    timestamp       = MetadataField('timestamp', is_dc=False, formatter=parser.parse)


@@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup):
 class TOC(list):

    def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
-                 base_path=os.getcwd(), type='unknown'):
+                 base_path=os.getcwd(), type='unknown', author=None,
+                 description=None):
        self.href = href
        self.fragment = fragment
        if not self.fragment:
@@ -31,6 +32,8 @@ class TOC(list):
        self.base_path = base_path
        self.play_order = play_order
        self.type = type
+        self.author = author
+        self.description = description

    def __str__(self):
        lines = ['TOC: %s#%s'%(self.href, self.fragment)]
@@ -59,11 +62,13 @@ class TOC(list):
        list.remove(self, entry)
        entry.parent = None

-    def add_item(self, href, fragment, text, play_order=None, type='unknown'):
+    def add_item(self, href, fragment, text, play_order=None, type='unknown',
+            author=None, description=None):
        if play_order is None:
            play_order = (self[-1].play_order if len(self) else self.play_order) + 1
        self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
-                        base_path=self.base_path, play_order=play_order, type=type))
+                        base_path=self.base_path, play_order=play_order,
+                        type=type, author=author, description=description))
        return self[-1]

    def top_level_items(self):
@@ -6,6 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+from cStringIO import StringIO

 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
@@ -31,16 +32,80 @@ class MOBIOutput(OutputFormatPlugin):
        OptionRecommendation(name='toc_title', recommended_value=None,
            help=_('Title for any generated in-line table of contents.')
        ),
-        OptionRecommendation(name='mobi_periodical',
-            recommended_value=False, level=OptionRecommendation.LOW,
-            help=_('Generate a periodical rather than a book.')
-        ),
        OptionRecommendation(name='dont_compress',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Disable compression of the file contents.')
        ),
    ])

+    def check_for_periodical(self):
+        if self.oeb.metadata.publication_type and \
+            unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
+                self.periodicalize_toc()
+                self.check_for_masthead()
+                self.opts.mobi_periodical = True
+        else:
+            self.opts.mobi_periodical = False
+
+    def check_for_masthead(self):
+        found = False
+        for typ in self.oeb.guide:
+            if type == 'masthead':
+                found = True
+                break
+        if not found:
+            self.oeb.log.debug('No masthead found, generating default one...')
+            from calibre.resources import server_resources
+            try:
+                from PIL import Image as PILImage
+                PILImage
+            except ImportError:
+                import Image as PILImage
+
+            raw = StringIO(server_resources['calibre.png'])
+            im = PILImage.open(raw)
+            of = StringIO()
+            im.save(of, 'GIF')
+            raw = of.getvalue()
+            id, href = self.oeb.manifest.generate('masthead', 'masthead')
+            self.oeb.manifest.add(id, href, 'image/gif', data=raw)
+            self.oeb.guide.add('masthead', 'Masthead Image', href)
+
+
+    def periodicalize_toc(self):
+        from calibre.ebooks.oeb.base import TOC
+        toc = self.oeb.toc
+        if toc and toc[0].klass != 'periodical':
+            self.log('Converting TOC for MOBI periodical indexing...')
+            articles = {}
+            if toc.depth < 3:
+                sections = [TOC(klass='section')]
+                for x in toc:
+                    sections[0].append(x)
+            else:
+                sections = list(toc)
+                for x in sections:
+                    x.klass = 'section'
+            for sec in sections:
+                articles[id(sec)] = []
+                for a in list(sec):
+                    a.klass = 'article'
+                    articles[id(sec)].append(a)
+                    sec.nodes.remove(a)
+            root = TOC(klass='periodical',
+                    title=unicode(self.oeb.metadata.title[0]))
+            for s in sections:
+                if articles[id(s)]:
+                    for a in articles[id(s)]:
+                        s.nodes.append(a)
+            root.nodes.append(s)
+
+            for x in list(toc.nodes):
+                toc.nodes.remove(x)
+
+            toc.nodes.append(root)
+
+
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
        from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
@@ -60,6 +125,7 @@ class MOBIOutput(OutputFormatPlugin):
        rasterizer(oeb, opts)
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
        mobimlizer(oeb, opts)
+        self.check_for_periodical()
        write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
        writer = MobiWriter(opts, imagemax=imagemax,
                compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
@@ -47,7 +47,7 @@ XPNSMAP      = {'h'  : XHTML_NS, 'o1' : OPF1_NS,    'o2' : OPF2_NS,
                'd09': DC09_NS,  'd10': DC10_NS,    'd11': DC11_NS,
                'xsi': XSI_NS,   'dt' : DCTERMS_NS, 'ncx': NCX_NS,
                'svg': SVG_NS,   'xl' : XLINK_NS,   're': RE_NS,
-                'mbp': MBP_NS }
+                'mbp': MBP_NS, 'calibre': CALIBRE_NS }

 OPF1_NSMAP   = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
 OPF2_NSMAP   = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
@@ -455,7 +455,8 @@ class Metadata(object):
                         'description', 'format', 'identifier', 'language',
                         'publisher', 'relation', 'rights', 'source',
                         'subject', 'title', 'type'])
-    CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp'])
+    CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
+                         'publication_type'])
    OPF_ATTRS     = {'role': OPF('role'), 'file-as': OPF('file-as'),
                     'scheme': OPF('scheme'), 'event': OPF('event'),
                     'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
@@ -192,6 +192,7 @@ class OEBReader(object):
                        if not scheme and href not in known:
                            new.add(href)
            unchecked.clear()
+            warned = set([])
            for href in new:
                known.add(href)
                is_invalid = False
@@ -202,9 +203,13 @@ class OEBReader(object):
                if is_invalid:
                    continue
                if not self.oeb.container.exists(href):
-                    self.logger.warn('Referenced file %r not found' % href)
+                    if href not in warned:
+                        self.logger.warn('Referenced file %r not found' % href)
+                        warned.add(href)
                    continue
-                self.logger.warn('Referenced file %r not in manifest' % href)
+                if href not in warned:
+                    self.logger.warn('Referenced file %r not in manifest' % href)
+                    warned.add(href)
                id, _ = manifest.generate(id='added')
                guessed = guess_type(href)[0]
                media_type = guessed or BINARY_MIME
@@ -330,14 +335,14 @@ class OEBReader(object):
            po = int(child.get('playOrder', self.oeb.toc.next_play_order()))

            authorElement = xpath(child,
-                    'descendant::mbp:meta[@name = "author"]')
+                    'descendant::calibre:meta[@name = "author"]')
            if authorElement :
                author = authorElement[0].text
            else :
                author = None

            descriptionElement = xpath(child,
-                    'descendant::mbp:meta[@name = "description"]')
+                    'descendant::calibre:meta[@name = "description"]')
            if descriptionElement :
                description = descriptionElement[0].text
            else :
@@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log):
    if mi.timestamp is not None:
        m.clear('timestamp')
        m.add('timestamp', mi.timestamp.isoformat())
+    if mi.rights is not None:
+        m.clear('rights')
+        m.add('rights', mi.rights)
+    if mi.publication_type is not None:
+        m.clear('publication_type')
+        m.add('publication_type', mi.publication_type)
    if not m.timestamp:
        m.add('timestamp', datetime.utcnow().isoformat())


-
 class MergeMetadata(object):
    'Merge in user metadata, including cover'

@@ -13,6 +13,7 @@ from urlparse import urlparse

 from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
 from calibre.ebooks import ConversionError
+
 def XPath(x):
    try:
        return etree.XPath(x, namespaces=XPNSMAP)
@@ -64,8 +64,5 @@ class ManifestTrimmer(object):
            unchecked = new
        for item in oeb.manifest.values():
            if item not in used:
-                if getattr(self.opts, 'mobi_periodical', False) and \
-                        item.href == 'images/mastheadImage.gif':
-                    continue
                oeb.logger.info('Trimming %r from manifest' % item.href)
                oeb.manifest.remove(item)
@@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent, 'mobi_output',
                ['prefer_author_sort', 'rescale_images', 'toc_title',
-                'dont_compress', 'mobi_periodical']
+                'dont_compress',]
                )
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
@@ -41,7 +41,7 @@
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="5" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@@ -61,13 +61,6 @@
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
-    <widget class="QCheckBox" name="opt_mobi_periodical">
-     <property name="text">
-      <string>Generate a periodical rather than a book</string>
-     </property>
-    </widget>
-   </item>
   <item row="0" column="0">
    <widget class="QCheckBox" name="opt_no_inline_toc">
     <property name="text">
@@ -16,6 +16,7 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, QString, \

 from calibre import strftime
 from calibre.ptempfile import PersistentTemporaryFile
+from calibre.utils.pyparsing import ParseException
 from calibre.library.database2 import FIELD_MAP
 from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
                         error_dialog
@@ -267,7 +268,11 @@ class BooksModel(QAbstractTableModel):
            self.count_changed()

    def search(self, text, refinement, reset=True):
-        self.db.search(text)
+        try:
+            self.db.search(text)
+        except ParseException:
+            self.emit(SIGNAL('parse_exception()'))
+            return
        self.last_search = text
        if reset:
            self.clear_caches()
@@ -898,7 +903,12 @@ class DeviceBooksModel(BooksModel):
        if not text or not text.strip():
            self.map = list(range(len(self.db)))
        else:
-            matches = self.search_engine.parse(text)
+            try:
+                matches = self.search_engine.parse(text)
+            except ParseException:
+                self.emit(SIGNAL('parse_exception()'))
+                return
+
            self.map = []
            for i in range(len(self.db)):
                if i in matches:
@@ -221,9 +221,20 @@ class LibraryServer(object):

    def get_format(self, id, format):
        format = format.upper()
-        fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
+        fmt = self.db.format(id, format, index_is_id=True, as_file=True,
+                mode='r+b')
        if fmt is None:
            raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
+        if format == 'EPUB':
+            from tempfile import TemporaryFile
+            from calibre.ebooks.metadata.meta import set_metadata
+            raw = fmt.read()
+            fmt = TemporaryFile()
+            fmt.write(raw)
+            fmt.seek(0)
+            set_metadata(fmt, self.db.get_metadata(id, index_is_id=True),
+                    'epub')
+            fmt.seek(0)
        mt = guess_type('dummy.'+format.lower())[0]
        if mt is None:
            mt = 'application/octet-stream'
@@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe):
    #: Delay between consecutive downloads in seconds
    delay                  = 0

+    #: Publication type
+    #: Set to newspaper, magazine or blog
+    publication_type = 'unknown'
+
    #: Number of simultaneous downloads. Set to 1 if the server is picky.
    #: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
    simultaneous_downloads = 5
@@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe):
        mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
        mi.publisher = __appname__
        mi.author_sort = __appname__
+        mi.publication_type = 'periodical:'+self.publication_type
        opf_path = os.path.join(dir, 'index.opf')
        ncx_path = os.path.join(dir, 'index.ncx')
        opf = OPFCreator(dir, mi)
@@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe):
            for j, a in enumerate(f):
                if getattr(a, 'downloaded', False):
                    adir = 'feed_%d/article_%d/'%(num, j)
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
-                                    play_order=po)
+                                    play_order=po, description=desc)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
                        prefix = os.path.commonprefix([opf_path, sp])
@@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe):
                if po is None:
                    self.play_order_counter += 1
                    po = self.play_order_counter
-                feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po))
+                desc = f.description
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc))
        else:
            entries.append('feed_%d/index.html'%0)
            feed_index(0, toc)
@@ -7,9 +7,9 @@ clarin.com
 '''

 from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag

-from calibre.web.feeds.news import BasicNewsRecipe
-
 class Clarin(BasicNewsRecipe):
    title                 = 'Clarin'
    __author__            = 'Darko Miletic'
@@ -22,14 +22,19 @@ class Clarin(BasicNewsRecipe):
    no_stylesheets        = True
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    remove_javascript     = True
-    
+    encoding              = 'cp1252'
+    language              = _('Spanish')
+    lang                  = 'es-AR'
+    direction             = 'ltr'
+    extra_css             = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
+
    html2lrf_options = [
-                          '--comment', description
+                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'

    remove_tags = [
                     dict(name='a'   , attrs={'class':'Imp'   })
@@ -48,17 +53,20 @@ class Clarin(BasicNewsRecipe):
              ,(u'Deportes'      , u'http://www.clarin.com/diario/hoy/deportes.xml'     )
            ]

-    def get_article_url(self, article):
-        artl  = article.get('link',  None)
-        rest  = artl.partition('-0')[-1]
+    def print_version(self, url):
+        rest  = url.partition('-0')[-1]
        lmain = rest.partition('.')[0]
-        return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
+        lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
+        return lurl

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
-        soup.head.insert(0,mtag)    
+        soup.html['lang'] = self.lang
+        soup.html['dir' ] = self.direction
+        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
+        soup.head.insert(0,mlang)
+        soup.head.insert(1,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        return soup

-    language = _('Spanish')
@@ -7,94 +7,108 @@ from calibre.web.feeds.news import BasicNewsRecipe

 # http://online.wsj.com/page/us_in_todays_paper.html

-class WallStreetJournal(BasicNewsRecipe): 
-    
-        title = 'The Wall Street Journal' 
-        __author__ = 'Kovid Goyal'
+class WallStreetJournal(BasicNewsRecipe):
+
+        title = 'The Wall Street Journal'
+        __author__ = 'Kovid Goyal and Sujata Raman'
        description = 'News and current affairs.'
        needs_subscription = True
        language = _('English')
        max_articles_per_feed = 10
-        timefmt  = ' [%a, %b %d, %Y]' 
+        timefmt  = ' [%a, %b %d, %Y]'
        no_stylesheets = True
+
+        extra_css      = '''h1{color:#093D72 ; font-size:large ; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; }
+                        h2{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
+                        .subhead{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
+                        .insettipUnit {color:#666666; font-family:Arial,Sans-serif;font-size:xx-small }
+                        .targetCaption{ font-size:x-small; color:#333333; font-family:Arial,Helvetica,sans-serif}
+                        .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
+                        .tagline {color:#333333; font-size:xx-small}
+                        .dateStamp {color:#666666; font-family:Arial,Helvetica,sans-serif}
+                         h3{color:blue ;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
+                         .byline{color:blue;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
+                         h6{color:#333333; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic; }
+                        .paperLocation{color:#666666; font-size:xx-small}'''
+
        remove_tags_before = dict(name='h1')
        remove_tags = [
-                       dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive"]),
-                       {'class':['more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
+                       dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow"]),
+                       {'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map','insettip','more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
                       dict(rel='shortcut icon'),
                      ]
        remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]

-        
-        def get_browser(self): 
-            br = BasicNewsRecipe.get_browser() 
-            if self.username is not None and self.password is not None: 
-                br.open('http://commerce.wsj.com/auth/login') 
-                br.select_form(nr=0) 
-                br['user']   = self.username 
-                br['password'] = self.password 
-                br.submit() 
+
+        def get_browser(self):
+            br = BasicNewsRecipe.get_browser()
+            if self.username is not None and self.password is not None:
+                br.open('http://commerce.wsj.com/auth/login')
+                br.select_form(nr=0)
+                br['user']   = self.username
+                br['password'] = self.password
+                br.submit()
            return br
-        
+
        def postprocess_html(self, soup, first):
            for tag in soup.findAll(name=['table', 'tr', 'td']):
                tag.name = 'div'
            return soup
-        
+
        def get_article_url(self, article):
            try:
                return article.feedburner_origlink.split('?')[0]
            except AttributeError:
                return article.link.split('?')[0]
- 
-        def cleanup(self): 
-            self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com') 
- 
-        feeds =  [ 
-                #('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'), 
-                #('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'), 
-                #('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'), 
-                (' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'), 
-                (' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'), 
-                # ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'), 
-                ('Today\'s Newspaper -  Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'), 
-                ('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'), 
-                ('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'), 
-                ('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'), 
-                ('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'), 
-                ('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'), 
-                ('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'), 
-                ('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'), 
-                ('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'), 
-                ('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'), 
-                ('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'), 
-                ('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'), 
-                ('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'), 
-                ('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'), 
-                ('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'), 
-                ('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'), 
-                ('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'), 
-                ('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'), 
-                ('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'), 
-                ('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'), 
-                ('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'), 
-                ('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'), 
-                ('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'), 
-                ('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'), 
-                ('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'), 
-                ('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'), 
-                ('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'), 
-                ('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'), 
-                ('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'), 
-                ('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'), 
-                ('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'), 
-                ('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'), 
-                ('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'), 
-                ('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'), 
-                ('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'), 
-                ('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'), 
-                ('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'), 
-                ('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'), 
-                ('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'), 
+
+        def cleanup(self):
+            self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
+
+        feeds =  [
+                #('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
+                #('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
+                #('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
+                (' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
+                (' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
+                # ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
+                ('Today\'s Newspaper -  Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
+                ('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
+                ('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
+                ('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'),
+                ('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'),
+                ('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'),
+                ('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'),
+                ('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'),
+                ('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'),
+                ('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'),
+                ('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'),
+                ('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'),
+                ('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'),
+                ('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
+                ('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'),
+                ('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'),
+                ('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'),
+                ('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'),
+                ('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'),
+                ('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'),
+                ('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'),
+                ('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'),
+                ('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'),
+                ('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'),
+                ('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'),
+                ('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'),
+                ('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'),
+                ('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'),
+                ('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'),
+                ('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
+                ('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'),
+                ('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'),
+                ('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'),
+                ('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'),
+                ('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'),
+                ('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'),
+                ('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'),
+                ('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'),
+                ('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'),
                ]