Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2009-07-30 19:18:33 -04:00 · 2009-07-30 19:18:33 -04:00 · f10852a43c
commit f10852a43c
parent 6b5d4d3548 fab11a14cf
6 changed files with 117 additions and 28 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -36,6 +36,7 @@ mimetypes.add_type('application/ereader',                 '.pdb')
 mimetypes.add_type('application/mobi',                    '.mobi')
 mimetypes.add_type('application/mobi',                    '.prc')
 mimetypes.add_type('application/mobi',                    '.azw')
 mimetypes.add_type('image/wmf',                           '.wmf')
 guess_type = mimetypes.guess_type
 import cssutils
 cssutils.log.setLevel(logging.WARN)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os
+import os, glob, re
 from lxml import etree
@ -61,6 +61,39 @@ class RTFInput(InputFormatPlugin):
        os.remove('out.xml')
        return ans
    def extract_images(self, picts):
        self.log('Extracting images...')
        count = 0
        raw = open(picts, 'rb').read()
        starts = []
        for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
            starts.append(match.start(1))
        imap = {}
        for start in starts:
            pos, bc = start, 1
            while bc > 0:
                if raw[pos] == '}': bc -= 1
                elif raw[pos] == '{': bc += 1
                pos += 1
            pict = raw[start:pos+1]
            enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
            ext = '.jpg'
            if 'EMF' in data[:200]:
                ext = '.wmf'
            elif 'PNG' in data[:200]:
                ext = '.png'
            count += 1
            name = (('%4d'%count).replace(' ', '0'))+ext
            open(name, 'wb').write(data)
            imap[count] = name
            #open(name+'.hex', 'wb').write(enc)
        return imap
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.rtf.xsl import xhtml
@ -74,9 +107,22 @@ class RTFInput(InputFormatPlugin):
        except RtfInvalidCodeException:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.'))
        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')
        self.log('Parsing XML...')
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.fromstring(xml, parser=parser)
        for pict in doc.xpath('//rtf:pict[@num]',
                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)
        self.log('Converting XML to HTML...')
        styledoc = etree.fromstring(xhtml)
--- a/src/calibre/ebooks/rtf/xsl.py
+++ b/src/calibre/ebooks/rtf/xsl.py
@ -18,11 +18,11 @@
 xhtml = '''\
 <?xml version="1.0"?>
-<xsl:stylesheet version="1.0" 
+<xsl:stylesheet version="1.0"
-    xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:html="http://www.w3.org/1999/xhtml"
    xmlns:rtf="http://rtf2xml.sourceforge.net/"
-    exclude-result-prefixes="rtf"    
+    exclude-result-prefixes="rtf"
 >
    <xsl:template match = "rtf:para">
@ -36,7 +36,7 @@ xhtml = '''\
                              parent::rtf:paragraph-definition[@name='heading 7']|
                              parent::rtf:paragraph-definition[@name='heading 8']|
                              parent::rtf:paragraph-definition[@name='heading 9']
-            
+
            ">
                <xsl:variable name="head-number" select="substring(parent::rtf:paragraph-definition/@name, 9)"/>
                <xsl:element name="h{$head-number}">
@ -64,7 +64,7 @@ xhtml = '''\
                              parent::rtf:paragraph-definition[@name='heading 7']|
                              parent::rtf:paragraph-definition[@name='heading 8']|
                              parent::rtf:paragraph-definition[@name='heading 9']
-            
+
            ">
                <xsl:apply-templates/>
            </xsl:when>
@ -108,17 +108,17 @@ xhtml = '''\
            <xsl:when test = "@italics = 'true' ">
               <emph rend = "paragraph-emph-italics">
                    <xsl:apply-templates/>
-               </emph> 
+               </emph>
            </xsl:when>
            <xsl:when test = "@bold = 'true' ">
               <emph rend = "paragraph-emph-bold">
                    <xsl:apply-templates/>
-               </emph> 
+               </emph>
            </xsl:when>
            <xsl:when test = "@underlined">
               <emph rend = "paragraph-emph-underlined">
                    <xsl:apply-templates/>
-               </emph> 
+               </emph>
            </xsl:when>
            <xsl:when test = "(@strike-through = 'true')
                or (@double-strike-through = 'true')
@ -128,18 +128,18 @@ xhtml = '''\
                or (@shadow = 'true')
                or (@hidden = 'true')
                or (@outline = 'true')
-            
+
                ">
               <emph rend = "paragraph-emph">
                    <xsl:apply-templates/>
-               </emph> 
+               </emph>
            </xsl:when>
            <xsl:otherwise>
                <xsl:apply-templates/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
-    
+
    <xsl:template name="make-header">
        <head>
            <xsl:element name="meta">
@ -150,7 +150,7 @@ xhtml = '''\
                    <xsl:text>http://rtf2xml.sourceforge.net/</xsl:text>
                </xsl:attribute>
            </xsl:element>
-            
+
            <xsl:choose>
                <xsl:when test="/rtf:doc/rtf:preamble/rtf:doc-information">
                    <xsl:apply-templates select="/rtf:doc/rtf:preamble/rtf:doc-information" mode="header"/>
@ -333,7 +333,7 @@ xhtml = '''\
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
-    
+
    <xsl:template match="rtf:inline">
        <xsl:variable name="num-attrs" select="count(@*)"/>
        <xsl:choose>
@ -387,7 +387,7 @@ xhtml = '''\
                    </xsl:attribute>
                    <xsl:apply-templates/>
                </xsl:element>
-                
+
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
@ -401,9 +401,9 @@ xhtml = '''\
            </xsl:attribute>
            <xsl:apply-templates/>
        </xsl:element>
-        
+
    </xsl:template>
-    
+
       <xsl:template match="rtf:list[@list-type='unordered']">
       <xsl:element name="ul">
           <xsl:apply-templates/>
@ -479,13 +479,13 @@ xhtml = '''\
    <xsl:template match="rtf:preamble">
        <xsl:apply-templates/>
    </xsl:template>
-    
+
    <xsl:template match="rtf:page-break">
        <xsl:element name="br">
            <xsl:attribute name="style">page-break-after:always</xsl:attribute>
        </xsl:element>
    </xsl:template>
-    
+
    <xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
    <xsl:template match="rtf:body">
@ -505,11 +505,11 @@ xhtml = '''\
            <xsl:apply-templates/>
        </xsl:element>
    </xsl:template>
-    
+
    <xsl:template match = "rtf:field-block">
      <xsl:apply-templates/>
    </xsl:template>
-    
+
    <xsl:template match = "rtf:field[@type='hyperlink']">
        <xsl:element name ="a">
            <xsl:attribute name = "href">
@ -522,9 +522,13 @@ xhtml = '''\
    <xsl:template match = "rtf:field">
        <xsl:apply-templates/>
    </xsl:template>
-    
+
-    <xsl:template match="rtf:pict" />
+    <xsl:template match="rtf:pict">
-    
+        <xsl:element name="img">
            <xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
        </xsl:element>
    </xsl:template>
    <xsl:template match="*">
        <xsl:message>
            <xsl:text>no match for element: "</xsl:text>
@ -533,6 +537,6 @@ xhtml = '''\
        </xsl:message>
        <xsl:apply-templates/>
    </xsl:template>
-    
+
 </xsl:stylesheet>
-'''
+'''
--- a/src/calibre/ebooks/rtf2xml/pict.py
+++ b/src/calibre/ebooks/rtf2xml/pict.py
@ -55,7 +55,7 @@ class Pict:
        return "}\n"
    def __text_func(self, line):
        #tx<nu<__________<true text
-        return line[18:]
+        return line[17:]
    def __make_dir(self):
        """ Make a dirctory to put the image data in"""
        base_name = os.path.basename(getattr(self.__orig_file, 'name',
--- a/src/calibre/gui2/wizard/send_email.py
+++ b/src/calibre/gui2/wizard/send_email.py
@ -112,7 +112,8 @@ class SendEmail(QWidget, Ui_Form):
        self.relay_tls.setChecked(True)
        info_dialog(self, _('Finish gmail setup'),
-            _('Dont forget to enter your gmail username and password')).exec_()
+            _('Dont forget to enter your gmail username and password. '
                'You can sign up for a free gmail account at http://gmail.com')).exec_()
        self.relay_username.setFocus(Qt.OtherFocusReason)
        self.relay_username.setCursorPosition(0)
--- a/src/calibre/web/feeds/recipes/recipe_newsweek.py
+++ b/src/calibre/web/feeds/recipes/recipe_newsweek.py
@ -4,6 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import re
 from calibre import strftime
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe
 class Newsweek(BasicNewsRecipe):
@ -128,3 +129,39 @@ class Newsweek(BasicNewsRecipe):
        return cover_url
    def postprocess_book(self, oeb, opts, log) :
        def extractByline(href) :
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))            
            byline = soup.find(True,attrs={'class':'authorInfo'})
            byline = self.tag_to_string(byline) if byline is not None else ''
            issueDate = soup.find(True,attrs={'class':'issueDate'})
            issueDate = self.tag_to_string(issueDate) if issueDate is not None else ''
            issueDate = re.sub(',','', issueDate)
            if byline > '' and issueDate > '' :
                return byline + ' | ' + issueDate
            else :
                return byline + issueDate
        def extractDescription(href) :
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            description = soup.find(True,attrs={'name':'description'})
            if description is not None and description.has_key('content'):
                description = description['content']
                if description.startswith('Newsweek magazine online plus') :
                    description = soup.find(True, attrs={'class':'story'})
                    firstPara = soup.find('p')
                    description = self.tag_to_string(firstPara)
            else :
                description = soup.find(True, attrs={'class':'story'})
                firstPara = soup.find('p')
                description = self.tag_to_string(firstPara)
            return description    
        for section in oeb.toc :
            for article in section :
                if article.author is None :
                    article.author = extractByline(article.href)
                if article.description is None :
                    article.description = extractDescription(article.href)
        return