RTF Output: Newline characters should be turned into spaces not ignored. Add support for two SD cards in teh Droid

This commit is contained in:
Kovid Goyal 2010-05-29 10:57:56 -06:00
commit df2a4220df
4 changed files with 19 additions and 2 deletions

View File

@ -10,7 +10,7 @@ import time
from calibre import entity_to_unicode
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
Comment, BeautifulStoneSoup
Comment, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe):
@ -86,6 +86,7 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule',
'side_tool',
'singleAd',
'subNavigation tabContent active',
'subNavigation tabContent active clearfix',
]}),
dict(id=[
@ -94,6 +95,7 @@ class NYTimes(BasicNewsRecipe):
'articleExtras',
'articleInline',
'blog_sidebar',
'businessSearchBar',
'cCol',
'entertainmentSearchBar',
'footer',
@ -101,6 +103,7 @@ class NYTimes(BasicNewsRecipe):
'header_search',
'login',
'masthead',
'masthead-nav',
'memberTools',
'navigation',
'portfolioInline',

View File

@ -74,6 +74,7 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule',
'side_tool',
'singleAd',
'subNavigation tabContent active',
'subNavigation tabContent active clearfix',
]}),
dict(id=[

View File

@ -44,6 +44,7 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', 'GT-I5700']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE']
OSX_MAIN_MEM = 'HTC Android Phone Media'

View File

@ -19,6 +19,8 @@ except ImportError:
import cStringIO
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.stylizer import Stylizer
@ -118,13 +120,23 @@ class RTFMLizer(object):
for item in self.oeb_book.spine:
self.log.debug('Converting %s to RTF markup...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
content = self.remove_newlines(content)
output += self.dump_text(etree.fromstring(content), stylizer)
output += self.footer()
output = self.insert_images(output)
output = self.clean_text(output)
return output
def remove_newlines(self, text):
self.log.debug('\tRemove newlines for processing...')
text = text.replace('\r\n', ' ')
text = text.replace('\n', ' ')
text = text.replace('\r', ' ')
return text
def header(self):
return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))