mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
f10852a43c
@ -36,6 +36,7 @@ mimetypes.add_type('application/ereader', '.pdb')
|
|||||||
mimetypes.add_type('application/mobi', '.mobi')
|
mimetypes.add_type('application/mobi', '.mobi')
|
||||||
mimetypes.add_type('application/mobi', '.prc')
|
mimetypes.add_type('application/mobi', '.prc')
|
||||||
mimetypes.add_type('application/mobi', '.azw')
|
mimetypes.add_type('application/mobi', '.azw')
|
||||||
|
mimetypes.add_type('image/wmf', '.wmf')
|
||||||
guess_type = mimetypes.guess_type
|
guess_type = mimetypes.guess_type
|
||||||
import cssutils
|
import cssutils
|
||||||
cssutils.log.setLevel(logging.WARN)
|
cssutils.log.setLevel(logging.WARN)
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os
|
import os, glob, re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -61,6 +61,39 @@ class RTFInput(InputFormatPlugin):
|
|||||||
os.remove('out.xml')
|
os.remove('out.xml')
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def extract_images(self, picts):
|
||||||
|
self.log('Extracting images...')
|
||||||
|
count = 0
|
||||||
|
raw = open(picts, 'rb').read()
|
||||||
|
starts = []
|
||||||
|
for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
|
||||||
|
starts.append(match.start(1))
|
||||||
|
|
||||||
|
imap = {}
|
||||||
|
|
||||||
|
for start in starts:
|
||||||
|
pos, bc = start, 1
|
||||||
|
while bc > 0:
|
||||||
|
if raw[pos] == '}': bc -= 1
|
||||||
|
elif raw[pos] == '{': bc += 1
|
||||||
|
pos += 1
|
||||||
|
pict = raw[start:pos+1]
|
||||||
|
enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
|
||||||
|
if len(enc) % 2 == 1:
|
||||||
|
enc = enc[:-1]
|
||||||
|
data = enc.decode('hex')
|
||||||
|
ext = '.jpg'
|
||||||
|
if 'EMF' in data[:200]:
|
||||||
|
ext = '.wmf'
|
||||||
|
elif 'PNG' in data[:200]:
|
||||||
|
ext = '.png'
|
||||||
|
count += 1
|
||||||
|
name = (('%4d'%count).replace(' ', '0'))+ext
|
||||||
|
open(name, 'wb').write(data)
|
||||||
|
imap[count] = name
|
||||||
|
#open(name+'.hex', 'wb').write(enc)
|
||||||
|
return imap
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
from calibre.ebooks.rtf.xsl import xhtml
|
from calibre.ebooks.rtf.xsl import xhtml
|
||||||
@ -74,9 +107,22 @@ class RTFInput(InputFormatPlugin):
|
|||||||
except RtfInvalidCodeException:
|
except RtfInvalidCodeException:
|
||||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
'support. Convert it to HTML first and then try it.'))
|
'support. Convert it to HTML first and then try it.'))
|
||||||
|
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
||||||
|
if d:
|
||||||
|
imap = {}
|
||||||
|
try:
|
||||||
|
imap = self.extract_images(d[0])
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to extract images...')
|
||||||
self.log('Parsing XML...')
|
self.log('Parsing XML...')
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
|
for pict in doc.xpath('//rtf:pict[@num]',
|
||||||
|
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
|
||||||
|
num = int(pict.get('num'))
|
||||||
|
name = imap.get(num, None)
|
||||||
|
if name is not None:
|
||||||
|
pict.set('num', name)
|
||||||
self.log('Converting XML to HTML...')
|
self.log('Converting XML to HTML...')
|
||||||
styledoc = etree.fromstring(xhtml)
|
styledoc = etree.fromstring(xhtml)
|
||||||
|
|
||||||
|
@ -18,11 +18,11 @@
|
|||||||
|
|
||||||
xhtml = '''\
|
xhtml = '''\
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<xsl:stylesheet version="1.0"
|
<xsl:stylesheet version="1.0"
|
||||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
xmlns:html="http://www.w3.org/1999/xhtml"
|
xmlns:html="http://www.w3.org/1999/xhtml"
|
||||||
xmlns:rtf="http://rtf2xml.sourceforge.net/"
|
xmlns:rtf="http://rtf2xml.sourceforge.net/"
|
||||||
exclude-result-prefixes="rtf"
|
exclude-result-prefixes="rtf"
|
||||||
>
|
>
|
||||||
|
|
||||||
<xsl:template match = "rtf:para">
|
<xsl:template match = "rtf:para">
|
||||||
@ -36,7 +36,7 @@ xhtml = '''\
|
|||||||
parent::rtf:paragraph-definition[@name='heading 7']|
|
parent::rtf:paragraph-definition[@name='heading 7']|
|
||||||
parent::rtf:paragraph-definition[@name='heading 8']|
|
parent::rtf:paragraph-definition[@name='heading 8']|
|
||||||
parent::rtf:paragraph-definition[@name='heading 9']
|
parent::rtf:paragraph-definition[@name='heading 9']
|
||||||
|
|
||||||
">
|
">
|
||||||
<xsl:variable name="head-number" select="substring(parent::rtf:paragraph-definition/@name, 9)"/>
|
<xsl:variable name="head-number" select="substring(parent::rtf:paragraph-definition/@name, 9)"/>
|
||||||
<xsl:element name="h{$head-number}">
|
<xsl:element name="h{$head-number}">
|
||||||
@ -64,7 +64,7 @@ xhtml = '''\
|
|||||||
parent::rtf:paragraph-definition[@name='heading 7']|
|
parent::rtf:paragraph-definition[@name='heading 7']|
|
||||||
parent::rtf:paragraph-definition[@name='heading 8']|
|
parent::rtf:paragraph-definition[@name='heading 8']|
|
||||||
parent::rtf:paragraph-definition[@name='heading 9']
|
parent::rtf:paragraph-definition[@name='heading 9']
|
||||||
|
|
||||||
">
|
">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
@ -108,17 +108,17 @@ xhtml = '''\
|
|||||||
<xsl:when test = "@italics = 'true' ">
|
<xsl:when test = "@italics = 'true' ">
|
||||||
<emph rend = "paragraph-emph-italics">
|
<emph rend = "paragraph-emph-italics">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</emph>
|
</emph>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
<xsl:when test = "@bold = 'true' ">
|
<xsl:when test = "@bold = 'true' ">
|
||||||
<emph rend = "paragraph-emph-bold">
|
<emph rend = "paragraph-emph-bold">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</emph>
|
</emph>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
<xsl:when test = "@underlined">
|
<xsl:when test = "@underlined">
|
||||||
<emph rend = "paragraph-emph-underlined">
|
<emph rend = "paragraph-emph-underlined">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</emph>
|
</emph>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
<xsl:when test = "(@strike-through = 'true')
|
<xsl:when test = "(@strike-through = 'true')
|
||||||
or (@double-strike-through = 'true')
|
or (@double-strike-through = 'true')
|
||||||
@ -128,18 +128,18 @@ xhtml = '''\
|
|||||||
or (@shadow = 'true')
|
or (@shadow = 'true')
|
||||||
or (@hidden = 'true')
|
or (@hidden = 'true')
|
||||||
or (@outline = 'true')
|
or (@outline = 'true')
|
||||||
|
|
||||||
">
|
">
|
||||||
<emph rend = "paragraph-emph">
|
<emph rend = "paragraph-emph">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</emph>
|
</emph>
|
||||||
</xsl:when>
|
</xsl:when>
|
||||||
<xsl:otherwise>
|
<xsl:otherwise>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:otherwise>
|
</xsl:otherwise>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template name="make-header">
|
<xsl:template name="make-header">
|
||||||
<head>
|
<head>
|
||||||
<xsl:element name="meta">
|
<xsl:element name="meta">
|
||||||
@ -150,7 +150,7 @@ xhtml = '''\
|
|||||||
<xsl:text>http://rtf2xml.sourceforge.net/</xsl:text>
|
<xsl:text>http://rtf2xml.sourceforge.net/</xsl:text>
|
||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
|
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
<xsl:when test="/rtf:doc/rtf:preamble/rtf:doc-information">
|
<xsl:when test="/rtf:doc/rtf:preamble/rtf:doc-information">
|
||||||
<xsl:apply-templates select="/rtf:doc/rtf:preamble/rtf:doc-information" mode="header"/>
|
<xsl:apply-templates select="/rtf:doc/rtf:preamble/rtf:doc-information" mode="header"/>
|
||||||
@ -333,7 +333,7 @@ xhtml = '''\
|
|||||||
</xsl:otherwise>
|
</xsl:otherwise>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:inline">
|
<xsl:template match="rtf:inline">
|
||||||
<xsl:variable name="num-attrs" select="count(@*)"/>
|
<xsl:variable name="num-attrs" select="count(@*)"/>
|
||||||
<xsl:choose>
|
<xsl:choose>
|
||||||
@ -387,7 +387,7 @@ xhtml = '''\
|
|||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
|
|
||||||
</xsl:otherwise>
|
</xsl:otherwise>
|
||||||
</xsl:choose>
|
</xsl:choose>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
@ -401,9 +401,9 @@ xhtml = '''\
|
|||||||
</xsl:attribute>
|
</xsl:attribute>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:list[@list-type='unordered']">
|
<xsl:template match="rtf:list[@list-type='unordered']">
|
||||||
<xsl:element name="ul">
|
<xsl:element name="ul">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
@ -479,13 +479,13 @@ xhtml = '''\
|
|||||||
<xsl:template match="rtf:preamble">
|
<xsl:template match="rtf:preamble">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:page-break">
|
<xsl:template match="rtf:page-break">
|
||||||
<xsl:element name="br">
|
<xsl:element name="br">
|
||||||
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
|
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
|
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
|
||||||
|
|
||||||
<xsl:template match="rtf:body">
|
<xsl:template match="rtf:body">
|
||||||
@ -505,11 +505,11 @@ xhtml = '''\
|
|||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match = "rtf:field-block">
|
<xsl:template match = "rtf:field-block">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match = "rtf:field[@type='hyperlink']">
|
<xsl:template match = "rtf:field[@type='hyperlink']">
|
||||||
<xsl:element name ="a">
|
<xsl:element name ="a">
|
||||||
<xsl:attribute name = "href">
|
<xsl:attribute name = "href">
|
||||||
@ -522,9 +522,13 @@ xhtml = '''\
|
|||||||
<xsl:template match = "rtf:field">
|
<xsl:template match = "rtf:field">
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:pict" />
|
<xsl:template match="rtf:pict">
|
||||||
|
<xsl:element name="img">
|
||||||
|
<xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
|
||||||
|
</xsl:element>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="*">
|
<xsl:template match="*">
|
||||||
<xsl:message>
|
<xsl:message>
|
||||||
<xsl:text>no match for element: "</xsl:text>
|
<xsl:text>no match for element: "</xsl:text>
|
||||||
@ -533,6 +537,6 @@ xhtml = '''\
|
|||||||
</xsl:message>
|
</xsl:message>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
</xsl:stylesheet>
|
</xsl:stylesheet>
|
||||||
'''
|
'''
|
||||||
|
@ -55,7 +55,7 @@ class Pict:
|
|||||||
return "}\n"
|
return "}\n"
|
||||||
def __text_func(self, line):
|
def __text_func(self, line):
|
||||||
#tx<nu<__________<true text
|
#tx<nu<__________<true text
|
||||||
return line[18:]
|
return line[17:]
|
||||||
def __make_dir(self):
|
def __make_dir(self):
|
||||||
""" Make a dirctory to put the image data in"""
|
""" Make a dirctory to put the image data in"""
|
||||||
base_name = os.path.basename(getattr(self.__orig_file, 'name',
|
base_name = os.path.basename(getattr(self.__orig_file, 'name',
|
||||||
|
@ -112,7 +112,8 @@ class SendEmail(QWidget, Ui_Form):
|
|||||||
self.relay_tls.setChecked(True)
|
self.relay_tls.setChecked(True)
|
||||||
|
|
||||||
info_dialog(self, _('Finish gmail setup'),
|
info_dialog(self, _('Finish gmail setup'),
|
||||||
_('Dont forget to enter your gmail username and password')).exec_()
|
_('Dont forget to enter your gmail username and password. '
|
||||||
|
'You can sign up for a free gmail account at http://gmail.com')).exec_()
|
||||||
self.relay_username.setFocus(Qt.OtherFocusReason)
|
self.relay_username.setFocus(Qt.OtherFocusReason)
|
||||||
self.relay_username.setCursorPosition(0)
|
self.relay_username.setCursorPosition(0)
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
import re
|
import re
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Newsweek(BasicNewsRecipe):
|
class Newsweek(BasicNewsRecipe):
|
||||||
@ -128,3 +129,39 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
def postprocess_book(self, oeb, opts, log) :
|
||||||
|
|
||||||
|
def extractByline(href) :
|
||||||
|
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
||||||
|
byline = soup.find(True,attrs={'class':'authorInfo'})
|
||||||
|
byline = self.tag_to_string(byline) if byline is not None else ''
|
||||||
|
issueDate = soup.find(True,attrs={'class':'issueDate'})
|
||||||
|
issueDate = self.tag_to_string(issueDate) if issueDate is not None else ''
|
||||||
|
issueDate = re.sub(',','', issueDate)
|
||||||
|
if byline > '' and issueDate > '' :
|
||||||
|
return byline + ' | ' + issueDate
|
||||||
|
else :
|
||||||
|
return byline + issueDate
|
||||||
|
|
||||||
|
def extractDescription(href) :
|
||||||
|
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
||||||
|
description = soup.find(True,attrs={'name':'description'})
|
||||||
|
if description is not None and description.has_key('content'):
|
||||||
|
description = description['content']
|
||||||
|
if description.startswith('Newsweek magazine online plus') :
|
||||||
|
description = soup.find(True, attrs={'class':'story'})
|
||||||
|
firstPara = soup.find('p')
|
||||||
|
description = self.tag_to_string(firstPara)
|
||||||
|
else :
|
||||||
|
description = soup.find(True, attrs={'class':'story'})
|
||||||
|
firstPara = soup.find('p')
|
||||||
|
description = self.tag_to_string(firstPara)
|
||||||
|
return description
|
||||||
|
|
||||||
|
for section in oeb.toc :
|
||||||
|
for article in section :
|
||||||
|
if article.author is None :
|
||||||
|
article.author = extractByline(article.href)
|
||||||
|
if article.description is None :
|
||||||
|
article.description = extractDescription(article.href)
|
||||||
|
return
|
||||||
|
Loading…
x
Reference in New Issue
Block a user