From e13c15f8e6a7bbd09a0035daf0d0d2a1de59b5a4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 1 Apr 2009 13:03:17 -0700 Subject: [PATCH 1/8] Fix bug causing dates for downloaded news to be in Spanish. Fixes #2198 (Bug fix of recipe La Prensa - Nicaragua) --- .../web/feeds/recipes/recipe_laprensa_ni.py | 25 +++---------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_laprensa_ni.py b/src/calibre/web/feeds/recipes/recipe_laprensa_ni.py index 7a137ff07e..c208640283 100644 --- a/src/calibre/web/feeds/recipes/recipe_laprensa_ni.py +++ b/src/calibre/web/feeds/recipes/recipe_laprensa_ni.py @@ -6,7 +6,7 @@ __copyright__ = '2009, Darko Miletic ' laprensa.com.ni ''' -import locale +import datetime import time from calibre.web.feeds.news import BasicNewsRecipe @@ -23,23 +23,9 @@ class LaPrensa_ni(BasicNewsRecipe): encoding = 'cp1252' remove_javascript = True language = _('Spanish') - - #Locale setting to get appropriate date/month values in Spanish - try: - #Windows seting for locale - locale.setlocale(locale.LC_TIME,'Spanish_Nicaragua') - except locale.Error: - #Linux setting for locale -- choose one appropriate for your distribution - try: - locale.setlocale(locale.LC_TIME,'es_NI') - except locale.Error: - try: - locale.setlocale(locale.LC_TIME,'es_ES') - except: - pass - - - current_index = time.strftime("http://www.laprensa.com.ni/archivo/%Y/%B/%d/noticias/") + months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre'] + current_month = months_es[datetime.date.today().month - 1] + current_index = time.strftime("http://www.laprensa.com.ni/archivo/%Y/" + current_month + "/%d/noticias/") html2lrf_options = [ '--comment', description @@ -91,6 +77,3 @@ class LaPrensa_ni(BasicNewsRecipe): totalfeeds.append((feedtitle, articles)) return totalfeeds - def cleanup(self): - #Going back to the default locale - locale.setlocale(locale.LC_TIME,'') From d7e9ca4bee2f2609c0ff636f1aacf052c5f08607 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 1 Apr 2009 20:42:02 -0700 Subject: [PATCH 2/8] MOBI Input:Fix passthrough of TOC to conversion pipeline --- src/calibre/ebooks/conversion/plumber.py | 2 +- src/calibre/ebooks/mobi/reader.py | 20 +++++++++++++------- src/calibre/ebooks/oeb/output.py | 2 +- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index c2fefa29db..6142cb555a 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -339,7 +339,7 @@ OptionRecommendation(name='language', trimmer = ManifestTrimmer() trimmer(self.oeb, self.opts) - self.log.info('Creating %s output...'%self.output_plugin.name) + self.log.info('Creating %s...'%self.output_plugin.name) self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts, self.log) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index fcd09d13c7..a78b5085d9 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -266,12 +266,14 @@ class MobiReader(object): parse_cache[htmlfile] = root self.htmlfile = htmlfile ncx = cStringIO.StringIO() - opf = self.create_opf(htmlfile, guide, root) + opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf' - opf.render(open(self.created_opf_path, 'wb'), ncx) + opf.render(open(self.created_opf_path, 'wb'), ncx, + ncx_manifest_entry=ncx_manifest_entry) ncx = ncx.getvalue() if ncx: - open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) + ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx') + open(ncx_path, 'wb').write(ncx) with open('styles.css', 'wb') as s: s.write(self.base_css_rules+'\n\n') @@ -284,8 +286,9 @@ class MobiReader(object): if self.book_header.exth is not None or self.embedded_mi is not None: self.log.debug('Creating OPF...') ncx = cStringIO.StringIO() - opf = self.create_opf(htmlfile, guide, root) - opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) + opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) + opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx, + ncx_manifest_entry ) ncx = ncx.getvalue() if ncx: open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) @@ -434,7 +437,10 @@ class MobiReader(object): for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() + + ncx_manifest_entry = None if toc: + ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') @@ -461,7 +467,7 @@ class MobiReader(object): if tocobj is not None: opf.set_toc(tocobj) - return opf + return opf, ncx_manifest_entry def sizeof_trailing_entries(self, data): @@ -589,7 +595,7 @@ def get_metadata(stream): if mr.book_header.exth is None: mi = MetaInformation(mr.name, [_('Unknown')]) else: - mi = mr.create_opf('dummy.html') + mi = mr.create_opf('dummy.html')[0] try: if hasattr(mr.book_header.exth, 'cover_offset'): cover_index = mr.book_header.first_image_index + \ diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index 2716ff57cd..fc1366fbcd 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -44,7 +44,7 @@ class OEBOutput(OutputFormatPlugin): else: raw = etree.tostring(raw, encoding='utf-8', pretty_print=opts.pretty_print) - raw = raw + '\n' + raw = '\n'+raw if isinstance(raw, unicode): raw = raw.encode('utf-8') with open(path, 'wb') as f: From ba816d3efc046d3a5b6ebcb2c44b09903d438cb6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Apr 2009 10:07:19 -0700 Subject: [PATCH 3/8] Fix stream descrambling in lrf2lrs --- src/calibre/ebooks/lrf/objects.py | 414 +++++++++++++++--------------- 1 file changed, 209 insertions(+), 205 deletions(-) diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py index ff9edc4e47..0693b9a508 100644 --- a/src/calibre/ebooks/lrf/objects.py +++ b/src/calibre/ebooks/lrf/objects.py @@ -15,13 +15,13 @@ ruby_tags = { } class LRFObject(object): - + tag_map = { 0xF500: ['', ''], 0xF502: ['infoLink', 'D'], 0xF501: ['', ''], } - + @classmethod def descramble_buffer(cls, buf, l, xorKey): i = 0 @@ -35,7 +35,7 @@ class LRFObject(object): @classmethod def parse_empdots(self, tag, f): self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.contents - + @staticmethod def tag_to_val(h, obj, tag, stream): val = None @@ -45,7 +45,7 @@ class LRFObject(object): val = tag.word elif h[1] == 'w': val = tag.word - if val > 0x8000: + if val > 0x8000: val -= 0x10000 elif h[1] == 'B': val = tag.byte @@ -56,44 +56,44 @@ class LRFObject(object): if len(h) > 2: val = h[2](val) if callable(h[2]) else h[2][val] return val - + def __init__(self, document, stream, id, scramble_key, boundary): self._scramble_key = scramble_key self._document = document self.id = id - + while stream.tell() < boundary: tag = Tag(stream) self.handle_tag(tag, stream) - + def parse_bg_image(self, tag, f): self.bg_image_mode, self.bg_image_id = struct.unpack(" 0x400 and (isinstance(self, ImageStream) or isinstance(self, Font) or isinstance(self, SoundStream)): - l = 0x400; + l = 0x400 self.stream = self.descramble_buffer(self.stream, l, key) if self.stream_flags & 0x100 !=0: decomp_size = struct.unpack("\n' return s - + def as_dict(self): d = {} for h in self.tag_map.values(): @@ -232,7 +236,7 @@ class PageAttr(StyleObject, LRFObject): 0xF529: ['', "parse_bg_image"], } tag_map.update(LRFObject.tag_map) - + @classmethod def to_css(cls, obj, inline=False): return '' @@ -241,79 +245,79 @@ class PageAttr(StyleObject, LRFObject): class Color(object): def __init__(self, val): self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF - + def __unicode__(self): return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) - + def __str__(self): return unicode(self) - - def __len__(self): + + def __len__(self): return 4 - + def __getitem__(self, i): # Qt compatible ordering and values return (self.r, self.g, self.b, 0xff-self.a)[i] # In Qt 0xff is opaque while in LRS 0x00 is opaque - + def to_html(self): return 'rgb(%d, %d, %d)'%(self.r, self.g, self.b) - + class EmptyPageElement(object): def __iter__(self): for i in range(0): yield i - + def __str__(self): return unicode(self) class PageDiv(EmptyPageElement): - + def __init__(self, pain, spacesize, linewidth, linecolor): self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth self.linecolor = Color(linecolor) - + def __unicode__(self): return u'\n\n'%\ (self.pain, self.spacesize, self.linewidth, self.color) - - + + class RuledLine(EmptyPageElement): - + linetype_map = {0x00: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'} - + def __init__(self, linelength, linetype, linewidth, linecolor): self.linelength, self.linewidth = linelength, linewidth self.linetype = self.linetype_map[linetype] self.linecolor = Color(linecolor) self.id = -1 - + def __unicode__(self): return u'\n\n'%\ (self.linelength, self.linetype, self.linewidth, self.linecolor) - + class Wait(EmptyPageElement): - + def __init__(self, time): self.time = time - + def __unicode__(self): return u'\n\n'%(self.time) - + class Locate(EmptyPageElement): - + pos_map = {1:'bottomleft', 2:'bottomright',3:'topright',4:'topleft', 5:'base'} - + def __init__(self, pos): self.pos = self.pos_map[pos] - + def __unicode__(self): return u'\n\n'%(self.pos) - + class BlockSpace(EmptyPageElement): - + def __init__(self, xspace, yspace): self.xspace, self.yspace = xspace, yspace - + def __unicode__(self): return u'\n\n'%\ (self.xspace, self.yspace) @@ -332,7 +336,7 @@ class Page(LRFStream): evenfooter = property(fget=lambda self : self._document.objects[self.style.evenfooterid]) oddheader = property(fget=lambda self : self._document.objects[self.style.oddheaderid]) oddfooter = property(fget=lambda self : self._document.objects[self.style.oddfooterid]) - + class Content(LRFContentObject): tag_map = { 0xF503: 'link', @@ -344,44 +348,44 @@ class Page(LRFStream): 0xF5D4: 'wait', 0xF5D6: 'sound_stop', } - + def __init__(self, bytes, objects): self.in_blockspace = False LRFContentObject.__init__(self, bytes, objects) - + def link(self, tag): self.close_blockspace() self._contents.append(self.objects[tag.dword]) - + def page_div(self, tag): self.close_blockspace() pars = struct.unpack("\n'%(self.style_id, self.id) for i in self: s += unicode(i) s += '\n\n' return s - + def __str__(self): return unicode(self) - + def to_html(self): s = u'' for i in self: s += i.to_html() return s - - + + class BlockAttr(StyleObject, LRFObject): tag_map = { @@ -443,7 +447,7 @@ class BlockAttr(StyleObject, LRFObject): 0xF529: ['', 'parse_bg_image'], } tag_map.update(LRFObject.tag_map) - + @classmethod def to_css(cls, obj, inline=False): ans = '' @@ -451,7 +455,7 @@ class BlockAttr(StyleObject, LRFObject): ans += '' if inline else '\t' ans += line ans += ' ' if inline else '\n' - + if hasattr(obj, 'sidemargin'): margin = str(obj.sidemargin) + 'px' item('margin-left: %(m)s; margin-right: %(m)s;'%dict(m=margin)) @@ -465,13 +469,13 @@ class BlockAttr(StyleObject, LRFObject): item('border-color: %s;'%obj.framecolor.to_html()) if hasattr(obj, 'bgcolor') and obj.bgcolor.a < 255: item('background-color: %s;'%obj.bgcolor.to_html()) - + return ans - - + + class TextCSS(object): - + @classmethod def to_css(cls, obj, inline=False): ans = '' @@ -479,7 +483,7 @@ class TextCSS(object): ans += '' if inline else '\t' ans += line ans += ' ' if inline else '\n' - + fs = getattr(obj, 'fontsize', None) if fs is not None: item('font-size: %fpt;'%(int(fs)/10.)) @@ -508,18 +512,18 @@ class TextCSS(object): pi = getattr(obj, 'parindent', None) if pi is not None: item('text-indent: %fpt;'%(int(pi)/10.)) - + return ans - - + + class TextAttr(StyleObject, LRFObject, TextCSS): - + FONT_MAP = collections.defaultdict(lambda : 'serif') for key, value in PRS500_PROFILE.default_fonts.items(): FONT_MAP[value] = key - + tag_map = { 0xF511: ['fontsize', 'w'], 0xF512: ['fontwidth', 'w'], @@ -544,8 +548,8 @@ class TextAttr(StyleObject, LRFObject, TextCSS): } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) - - + + class Block(LRFStream, TextCSS): tag_map = { @@ -556,10 +560,10 @@ class Block(LRFStream, TextCSS): tag_map.update(LRFStream.tag_map) extra_attrs = [i[0] for i in BlockAttr.tag_map.values()] extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()]) - + style = property(fget=lambda self : self._document.objects[self.style_id]) textstyle = property(fget=lambda self : self._document.objects[self.textstyle_id]) - + def initialize(self): self.attrs = {} stream = cStringIO.StringIO(self.stream) @@ -572,7 +576,7 @@ class Block(LRFStream, TextCSS): self.textstyle_id = obj.style_id elif isinstance(obj, Text): self.name = 'TextBlock' - self.textstyle_id = obj.style_id + self.textstyle_id = obj.style_id elif isinstance(obj, Image): self.name = 'ImageBlock' for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'): @@ -582,14 +586,14 @@ class Block(LRFStream, TextCSS): self.name = 'ButtonBlock' else: raise LRFParseError("Unexpected block type: "+obj.__class__.__name__) - + self.content = obj - - + + for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) - + def __unicode__(self): s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) if hasattr(self, 'textstyle_id'): @@ -602,12 +606,12 @@ class Block(LRFStream, TextCSS): s += '\n'%(self.name,) return s return s.rstrip() + ' />\n' - + def to_html(self): if self.name == 'TextBlock': return u'
%s
'%(self.style_id, self.textstyle_id, self.content.to_html()) return u'' - + class MiniPage(LRFStream): tag_map = { @@ -625,12 +629,12 @@ class Text(LRFStream): } tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) - + style = property(fget=lambda self : self._document.objects[self.style_id]) - + text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'\'', 0x3c: u'<', 0x3e: u'>' } entity_pattern = re.compile(r'&(\S+?);') - + text_tags = { 0xF581: ['simple_container', 'Italic'], 0xF582: 'end_container', @@ -668,47 +672,47 @@ class Text(LRFStream): 0xF5D1: 'plot', 0xF5D2: 'cr', } - + class TextTag(object): - + def __init__(self, name, attrs={}, self_closing=False): self.name = name self.attrs = attrs self.self_closing = self_closing - + def __unicode__(self): s = u'<%s '%(self.name,) for name, val in self.attrs.items(): s += '%s="%s" '%(name, val) return s.rstrip() + (u' />' if self.self_closing else u'>') - + def to_html(self): s = u'' return s - + def close_html(self): return u'' - + class Span(TextTag): pass - - + + linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} lineposition_map = {1:'before', 2:'after'} - + def add_text(self, text): s = unicode(text, "utf-16-le") if s: s = s.translate(self.text_map) self.content.append(self.entity_pattern.sub(entity_to_unicode, s)) - + def end_container(self, tag, stream): self.content.append(None) - + def start_para(self, tag, stream): self.content.append(self.__class__.TextTag('P')) - + def close_containers(self, start=0): if len(self.content) == 0: return @@ -723,8 +727,8 @@ class Text(LRFStream): open_containers += 1 start += 1 self.content.extend(None for i in range(open_containers)) - - + + def end_para(self, tag, stream): i = len(self.content)-1 while i > -1: @@ -732,22 +736,22 @@ class Text(LRFStream): break i -= 1 self.close_containers(start=i) - + def cr(self, tag, stream): self.content.append(self.__class__.TextTag('CR', self_closing=True)) - + def char_button(self, tag, stream): - self.content.append(self.__class__.TextTag( + self.content.append(self.__class__.TextTag( 'CharButton', attrs={'refobj':tag.dword})) - + def simple_container(self, tag, name): self.content.append(self.__class__.TextTag(name)) - + def empline(self, tag, stream): def invalid(op): stream.seek(op) #self.simple_container(None, 'EmpLine') - + oldpos = stream.tell() try: t = Tag(stream) @@ -759,7 +763,7 @@ class Text(LRFStream): h = TextAttr.tag_map[t.id] attrs = {} attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) - oldpos = stream.tell() + oldpos = stream.tell() try: t = Tag(stream) if t.id not in [0xF579, 0xF57A]: @@ -768,31 +772,31 @@ class Text(LRFStream): attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) except LRFParseError: stream.seek(oldpos) - + if attrs: - self.content.append(self.__class__.TextTag( + self.content.append(self.__class__.TextTag( 'EmpLine', attrs=attrs)) - + def space(self, tag, stream): - self.content.append(self.__class__.TextTag('Space', - attrs={'xsize':tag.sword}, - self_closing=True)) - + self.content.append(self.__class__.TextTag('Space', + attrs={'xsize':tag.sword}, + self_closing=True)) + def plot(self, tag, stream): xsize, ysize, refobj, adjustment = struct.unpack("= start_pos: if tag_pos > start_pos: @@ -833,9 +837,9 @@ class Text(LRFStream): self.add_text(self.stream) stream.seek(0, 2) break - + tag = Tag(stream) - + if tag.id == 0xF5CC: self.add_text(stream.read(tag.word)) elif tag.id in self.__class__.text_tags: # A Text tag @@ -857,9 +861,9 @@ class Text(LRFStream): self.content.append(self.__class__.Span('Span', {name:val})) current_style[name] = val if len(self.content) > 0: - self.close_containers() + self.close_containers() self.stream = None - + def __unicode__(self): s = u'' open_containers = collections.deque() @@ -868,16 +872,16 @@ class Text(LRFStream): s += c elif c is None: p = open_containers.pop() - s += u''%(p.name,) + s += u''%(p.name,) else: s += unicode(c) - if not c.self_closing: + if not c.self_closing: open_containers.append(c) - + if len(open_containers) > 0: - raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) + raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) return s - + def to_html(self): s = u'' open_containers = collections.deque() @@ -889,19 +893,19 @@ class Text(LRFStream): if c.name == 'P': in_p = False p = open_containers.pop() - s += p.close_html() + s += p.close_html() else: if c.name == 'P': in_p = True elif c.name == 'CR': s += '
' if in_p else '

' - else: + else: s += c.to_html() - if not c.self_closing: + if not c.self_closing: open_containers.append(c) - + if len(open_containers) > 0: - raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) + raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) return s @@ -909,30 +913,30 @@ class Image(LRFObject): tag_map = { 0xF54A: ['', 'parse_image_rect'], 0xF54B: ['', 'parse_image_size'], - 0xF54C: ['refstream', 'D'], + 0xF54C: ['refstream', 'D'], 0xF555: ['comment', 'P'], } - + def parse_image_rect(self, tag, f): self.x0, self.y0, self.x1, self.y1 = struct.unpack("\n'%\ (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) class PutObj(EmptyPageElement): - + def __init__(self, objects, x1, y1, refobj): self.x1, self.y1, self.refobj = x1, y1, refobj self.object = objects[refobj] - + def __unicode__(self): return u''%(self.x1, self.y1, self.refobj) @@ -949,14 +953,14 @@ class Canvas(LRFStream): 0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}], } tag_map.update(LRFStream.tag_map) - extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout', + extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout', 'framewidth', 'framecolor', 'framemode'] - + def parse_waits(self, tag, f): val = tag.word self.setwaitprop = val&0xF self.setwaitsync = val&0xF0 - + def initialize(self): self.attrs = {} for attr in self.extra_attrs: @@ -967,7 +971,7 @@ class Canvas(LRFStream): while stream.tell() < len(self.stream): tag = Tag(stream) self._contents.append(PutObj(self._document.objects, *struct.unpack("\n'%\ (self.id, self.encoding, self.file) @@ -1036,55 +1040,55 @@ class Button(LRFObject): 0xF5F9: ['','parse_run'], #Run } tag_map.update(LRFObject.tag_map) - + def __init__(self, document, stream, id, scramble_key, boundary): self.xml = u'' self.refimage = {} self.actions = {} self.to_dump = True LRFObject.__init__(self, document, stream, id, scramble_key, boundary) - + def do_ref_image(self, tag, f): self.refimage[self.button_type] = tag.dword - + def do_base_button(self, tag, f): self.button_type = 0 self.actions[self.button_type] = [] - + def do_focus_in_button(self, tag, f): self.button_type = 1 - + def do_push_button(self, tag, f): self.button_type = 2 - + def do_up_button(self, tag, f): self.button_type = 3 - + def do_start_actions(self, tag, f): self.actions[self.button_type] = [] - + def parse_jump_to(self, tag, f): self.actions[self.button_type].append((1, struct.unpack("\n'%(self.id,) if self.button_flags & 0x10 != 0: @@ -1098,10 +1102,10 @@ class Button(LRFObject): raise LRFParseError('Unsupported button type') s += '\n' return s - + refpage = property(fget=lambda self : self.jump_action(2)[0]) refobj = property(fget=lambda self : self.jump_action(2)[1]) - + class Window(LRFObject): pass @@ -1122,12 +1126,12 @@ class Font(LRFStream): } tag_map.update(LRFStream.tag_map) data = property(fget=lambda self: self.stream) - + def end_stream(self, *args): LRFStream.end_stream(self, *args) self._document.font_map[self.fontfacename] = self self.file = self.fontfacename + '.ttf' - + def __unicode__(self): s = '\n'%\ (self.id, self.fontfilename, self.fontfacename, self.file) @@ -1146,20 +1150,20 @@ class BookAttr(StyleObject, LRFObject): tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) binding_map = {1: 'Lr', 16 : 'Rl'} - + def __init__(self, document, stream, id, scramble_key, boundary): self.font_link_list = [] LRFObject.__init__(self, document, stream, id, scramble_key, boundary) - + def add_font(self, tag, f): self.font_link_list.append(tag.dword) - + def __unicode__(self): s = u'\n'%(self.id, self.id) s += u'\n'%(self._tags_to_xml(),) doc = self._document s += u'\n'%\ - (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) + (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) for font in self._document.font_map.values(): s += unicode(font) s += '\n' @@ -1169,15 +1173,15 @@ class SimpleText(Text): pass class TocLabel(object): - + def __init__(self, refpage, refobject, label): self.refpage, self.refobject, self.label = refpage, refobject, label - + def __unicode__(self): return u'%s\n'%(self.refpage, self.refobject, self.label) class TOCObject(LRFStream): - + def initialize(self): stream = cStringIO.StringIO(self.stream) c = struct.unpack("\n' for i in self: s += unicode(i) return s + '\n' - + object_map = [ None, #00 @@ -1246,7 +1250,7 @@ def get_object(document, stream, id, offset, size, scramble_key): obj_id, obj_type = struct.unpack(" Date: Thu, 2 Apr 2009 10:55:59 -0700 Subject: [PATCH 4/8] Fix #2200 (Recipe update: Exiled online) --- src/calibre/web/feeds/recipes/recipe_exiled.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_exiled.py b/src/calibre/web/feeds/recipes/recipe_exiled.py index e70e047e0d..017c9ea26e 100644 --- a/src/calibre/web/feeds/recipes/recipe_exiled.py +++ b/src/calibre/web/feeds/recipes/recipe_exiled.py @@ -13,7 +13,6 @@ class Exiled(BasicNewsRecipe): __author__ = 'Darko Miletic' description = "Mankind's only alternative since 1997 - Formerly known as The eXile" publisher = 'Exiled Online' - language = _('English') category = 'news, politics, international' oldest_article = 15 max_articles_per_feed = 100 @@ -21,16 +20,18 @@ class Exiled(BasicNewsRecipe): use_embedded_content = False encoding = 'utf8' remove_javascript = True + language = _('English') cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' html2lrf_options = [ '--comment' , description + , '--base-font-size', '10' , '--category' , category , '--publisher' , publisher ] - + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - + keep_only_tags = [dict(name='div', attrs={'id':'main'})] remove_tags = [ @@ -39,8 +40,8 @@ class Exiled(BasicNewsRecipe): ,dict(name='div', attrs={'id':['comments','navig']}) ] - - feeds = [(u'Articles', u'http://exiledonline.com/feed/' )] + + feeds = [(u'Articles', u'http://exiledonline.com/feed/')] def preprocess_html(self, soup): for item in soup.findAll(style=True): @@ -48,4 +49,9 @@ class Exiled(BasicNewsRecipe): mtag = '\n\n\n' soup.head.insert(0,mtag) return soup - + + def get_article_url(self, article): + raw = article.get('link', None) + final = raw + 'all/1/' + return final + From 9eceea476256baa6d630973c33134a92bdd676a0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Apr 2009 11:00:00 -0700 Subject: [PATCH 5/8] IGN:Basic blog --- src/calibre/www/apps/blog/models.py | 4 ++-- src/calibre/www/kovid/settings.py | 4 ++-- src/calibre/www/kovid/urls.py | 8 +++++--- src/{calibre/www/apps => }/tagging/__init__.py | 2 +- src/{calibre/www/apps => }/tagging/admin.py | 2 +- src/{calibre/www/apps => }/tagging/fields.py | 8 ++++---- src/{calibre/www/apps => }/tagging/forms.py | 6 +++--- src/{calibre/www/apps => }/tagging/generic.py | 0 src/{calibre/www/apps => }/tagging/managers.py | 2 +- src/{calibre/www/apps => }/tagging/models.py | 6 +++--- src/{calibre/www/apps => }/tagging/settings.py | 0 .../www/apps => }/tagging/templatetags/__init__.py | 0 .../www/apps => }/tagging/templatetags/tagging_tags.py | 0 src/{calibre/www/apps => }/tagging/utils.py | 4 ++-- src/{calibre/www/apps => }/tagging/views.py | 4 ++-- 15 files changed, 26 insertions(+), 24 deletions(-) rename src/{calibre/www/apps => }/tagging/__init__.py (90%) rename src/{calibre/www/apps => }/tagging/admin.py (60%) rename src/{calibre/www/apps => }/tagging/fields.py (94%) rename src/{calibre/www/apps => }/tagging/forms.py (88%) rename src/{calibre/www/apps => }/tagging/generic.py (100%) rename src/{calibre/www/apps => }/tagging/managers.py (97%) rename src/{calibre/www/apps => }/tagging/models.py (98%) rename src/{calibre/www/apps => }/tagging/settings.py (100%) rename src/{calibre/www/apps => }/tagging/templatetags/__init__.py (100%) rename src/{calibre/www/apps => }/tagging/templatetags/tagging_tags.py (100%) rename src/{calibre/www/apps => }/tagging/utils.py (98%) rename src/{calibre/www/apps => }/tagging/views.py (93%) diff --git a/src/calibre/www/apps/blog/models.py b/src/calibre/www/apps/blog/models.py index b719d8cb8a..16eab441b2 100644 --- a/src/calibre/www/apps/blog/models.py +++ b/src/calibre/www/apps/blog/models.py @@ -2,10 +2,10 @@ from django.db import models from django.utils.translation import ugettext_lazy as _ from django.db.models import permalink from django.contrib.auth.models import User -from calibre.www.apps.tagging.fields import TagField +from tagging.fields import TagField from calibre.www.apps.blog.managers import PublicManager -import calibre.www.apps.tagging as tagging +import tagging class Category(models.Model): """Category model.""" diff --git a/src/calibre/www/kovid/settings.py b/src/calibre/www/kovid/settings.py index 8d89c5e373..3b1899bfdf 100644 --- a/src/calibre/www/kovid/settings.py +++ b/src/calibre/www/kovid/settings.py @@ -40,10 +40,10 @@ INSTALLED_APPS = ( 'django.contrib.sites', 'django.contrib.admin', 'django.contrib.comments', + 'django.contrib.markup', 'calibre.www.apps.inlines', - 'calibre.www.apps.tagging', + 'tagging', 'calibre.www.apps.blog', - ) diff --git a/src/calibre/www/kovid/urls.py b/src/calibre/www/kovid/urls.py index e5a33b2597..05b2e451b0 100644 --- a/src/calibre/www/kovid/urls.py +++ b/src/calibre/www/kovid/urls.py @@ -2,14 +2,16 @@ from django.conf.urls.defaults import patterns, include, handler404, handler500 from django.conf import settings # Uncomment the next two lines to enable the admin: -#from django.contrib import admin -#admin.autodiscover() +from django.contrib import admin +admin.autodiscover() urlpatterns = patterns('', - # (r'^admin/(.*)', admin.site.root), + (r'^admin/(.*)', admin.site.root), (r'^comments/', include('django.contrib.comments.urls')), + (r'', include('calibre.www.apps.blog.urls')), + ) diff --git a/src/calibre/www/apps/tagging/__init__.py b/src/tagging/__init__.py similarity index 90% rename from src/calibre/www/apps/tagging/__init__.py rename to src/tagging/__init__.py index 9cca99b771..9241c202c3 100644 --- a/src/calibre/www/apps/tagging/__init__.py +++ b/src/tagging/__init__.py @@ -1,6 +1,6 @@ from django.utils.translation import ugettext as _ -from calibre.www.apps.tagging.managers import ModelTaggedItemManager, TagDescriptor +from tagging.managers import ModelTaggedItemManager, TagDescriptor VERSION = (0, 3, 'pre') diff --git a/src/calibre/www/apps/tagging/admin.py b/src/tagging/admin.py similarity index 60% rename from src/calibre/www/apps/tagging/admin.py rename to src/tagging/admin.py index 51cb472e50..4c1ef38546 100644 --- a/src/calibre/www/apps/tagging/admin.py +++ b/src/tagging/admin.py @@ -1,5 +1,5 @@ from django.contrib import admin -from calibre.www.apps.tagging.models import Tag, TaggedItem +from tagging.models import Tag, TaggedItem admin.site.register(TaggedItem) admin.site.register(Tag) diff --git a/src/calibre/www/apps/tagging/fields.py b/src/tagging/fields.py similarity index 94% rename from src/calibre/www/apps/tagging/fields.py rename to src/tagging/fields.py index f6bb9fce15..f52daff0b2 100644 --- a/src/calibre/www/apps/tagging/fields.py +++ b/src/tagging/fields.py @@ -5,9 +5,9 @@ from django.db.models import signals from django.db.models.fields import CharField from django.utils.translation import ugettext_lazy as _ -from calibre.www.apps.tagging import settings -from calibre.www.apps.tagging.models import Tag -from calibre.www.apps.tagging.utils import edit_string_for_tags +from tagging import settings +from tagging.models import Tag +from tagging.utils import edit_string_for_tags class TagField(CharField): """ @@ -101,7 +101,7 @@ class TagField(CharField): return 'CharField' def formfield(self, **kwargs): - from calibre.www.apps.tagging import forms + from tagging import forms defaults = {'form_class': forms.TagField} defaults.update(kwargs) return super(TagField, self).formfield(**defaults) diff --git a/src/calibre/www/apps/tagging/forms.py b/src/tagging/forms.py similarity index 88% rename from src/calibre/www/apps/tagging/forms.py rename to src/tagging/forms.py index 997ff5c66b..a70ac6bac4 100644 --- a/src/calibre/www/apps/tagging/forms.py +++ b/src/tagging/forms.py @@ -4,9 +4,9 @@ Tagging components for Django's form library. from django import forms from django.utils.translation import ugettext as _ -from calibre.www.apps.tagging import settings -from calibre.www.apps.tagging.models import Tag -from calibre.www.apps.tagging.utils import parse_tag_input +from tagging import settings +from tagging.models import Tag +from tagging.utils import parse_tag_input class AdminTagForm(forms.ModelForm): class Meta: diff --git a/src/calibre/www/apps/tagging/generic.py b/src/tagging/generic.py similarity index 100% rename from src/calibre/www/apps/tagging/generic.py rename to src/tagging/generic.py diff --git a/src/calibre/www/apps/tagging/managers.py b/src/tagging/managers.py similarity index 97% rename from src/calibre/www/apps/tagging/managers.py rename to src/tagging/managers.py index b17269e146..02cd1c23c9 100644 --- a/src/calibre/www/apps/tagging/managers.py +++ b/src/tagging/managers.py @@ -5,7 +5,7 @@ application. from django.contrib.contenttypes.models import ContentType from django.db import models -from calibre.www.apps.tagging.models import Tag, TaggedItem +from tagging.models import Tag, TaggedItem class ModelTagManager(models.Manager): """ diff --git a/src/calibre/www/apps/tagging/models.py b/src/tagging/models.py similarity index 98% rename from src/calibre/www/apps/tagging/models.py rename to src/tagging/models.py index 59fe7682c4..d43f22d964 100644 --- a/src/calibre/www/apps/tagging/models.py +++ b/src/tagging/models.py @@ -13,9 +13,9 @@ from django.db import connection, models from django.db.models.query import QuerySet from django.utils.translation import ugettext_lazy as _ -from calibre.www.apps.tagging import settings -from calibre.www.apps.tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input -from calibre.www.apps.tagging.utils import LOGARITHMIC +from tagging import settings +from tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input +from tagging.utils import LOGARITHMIC qn = connection.ops.quote_name diff --git a/src/calibre/www/apps/tagging/settings.py b/src/tagging/settings.py similarity index 100% rename from src/calibre/www/apps/tagging/settings.py rename to src/tagging/settings.py diff --git a/src/calibre/www/apps/tagging/templatetags/__init__.py b/src/tagging/templatetags/__init__.py similarity index 100% rename from src/calibre/www/apps/tagging/templatetags/__init__.py rename to src/tagging/templatetags/__init__.py diff --git a/src/calibre/www/apps/tagging/templatetags/tagging_tags.py b/src/tagging/templatetags/tagging_tags.py similarity index 100% rename from src/calibre/www/apps/tagging/templatetags/tagging_tags.py rename to src/tagging/templatetags/tagging_tags.py diff --git a/src/calibre/www/apps/tagging/utils.py b/src/tagging/utils.py similarity index 98% rename from src/calibre/www/apps/tagging/utils.py rename to src/tagging/utils.py index 5750c7a0f7..e89bab0e5a 100644 --- a/src/calibre/www/apps/tagging/utils.py +++ b/src/tagging/utils.py @@ -159,7 +159,7 @@ def get_tag_list(tags): * A ``Tag`` ``QuerySet``. """ - from calibre.www.apps.tagging.models import Tag + from tagging.models import Tag if isinstance(tags, Tag): return [tags] elif isinstance(tags, QuerySet) and tags.model is Tag: @@ -201,7 +201,7 @@ def get_tag(tag): If no matching tag can be found, ``None`` will be returned. """ - from calibre.www.apps.tagging.models import Tag + from tagging.models import Tag if isinstance(tag, Tag): return tag diff --git a/src/calibre/www/apps/tagging/views.py b/src/tagging/views.py similarity index 93% rename from src/calibre/www/apps/tagging/views.py rename to src/tagging/views.py index 53360da383..9e7e2f534b 100644 --- a/src/calibre/www/apps/tagging/views.py +++ b/src/tagging/views.py @@ -5,8 +5,8 @@ from django.http import Http404 from django.utils.translation import ugettext as _ from django.views.generic.list_detail import object_list -from calibre.www.apps.tagging.models import Tag, TaggedItem -from calibre.www.apps.tagging.utils import get_tag, get_queryset_and_model +from tagging.models import Tag, TaggedItem +from tagging.utils import get_tag, get_queryset_and_model def tagged_object_list(request, queryset_or_model=None, tag=None, related_tags=False, related_tag_counts=True, **kwargs): From 62688a6822fce9a5af20458cd9e4ccc78e487fe7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Apr 2009 11:07:49 -0700 Subject: [PATCH 6/8] Fix #2193 (PRS-700 epub flow_size crash) --- src/calibre/ebooks/epub/from_html.py | 120 ++++++++++++++------------- 1 file changed, 62 insertions(+), 58 deletions(-) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index 822346453a..318cf5cc02 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -8,24 +8,24 @@ Conversion of HTML/OPF files follows several stages: * All links in the HTML files or in the OPF manifest are followed to build up a list of HTML files to be converted. - This stage is implemented by + This stage is implemented by :function:`calibre.ebooks.html.traverse` and :class:`calibre.ebooks.html.HTMLFile`. - * The HTML is pre-processed to make it more semantic. + * The HTML is pre-processed to make it more semantic. All links in the HTML files to other resources like images, - stylesheets, etc. are relativized. The resources are copied + stylesheets, etc. are relativized. The resources are copied into the `resources` sub directory. This is accomplished by - :class:`calibre.ebooks.html.PreProcessor` and + :class:`calibre.ebooks.html.PreProcessor` and :class:`calibre.ebooks.html.Parser`. * The HTML is processed. Various operations are performed. - All style declarations are extracted and consolidated into + All style declarations are extracted and consolidated into a single style sheet. Chapters are auto-detected and marked. Various font related manipulations are performed. See :class:`HTMLProcessor`. - * The processed HTML is saved and the + * The processed HTML is saved and the :module:`calibre.ebooks.epub.split` module is used to split up large HTML files into smaller chunks. @@ -64,7 +64,7 @@ def remove_bad_link(element, attribute, link, pos): def check_links(opf_path, pretty_print): ''' - Find and remove all invalid links in the HTML files + Find and remove all invalid links in the HTML files ''' logger = logging.getLogger('html2epub') logger.info('\tChecking files for bad links...') @@ -78,7 +78,7 @@ def check_links(opf_path, pretty_print): if isinstance(f, str): f = f.decode('utf-8') html_files.append(os.path.abspath(content(f))) - + for path in html_files: if not os.access(path, os.R_OK): continue @@ -113,27 +113,27 @@ def find_html_index(files): return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:] class HTMLProcessor(Processor, Rationalizer): - + def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets): - Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, + Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='html2epub') if opts.verbose > 2: self.debug_tree('parsed') self.detect_chapters() - + self.extract_css(stylesheets) if self.opts.base_font_size2 > 0: - self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet], + self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet], self.root, self.opts) if opts.verbose > 2: self.debug_tree('nocss') - + if hasattr(self.body, 'xpath'): for script in list(self.body.xpath('descendant::script')): script.getparent().remove(script) - + self.fix_markup() - + def convert_image(self, img): rpath = img.get('src', '') path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/')) @@ -150,10 +150,10 @@ class HTMLProcessor(Processor, Rationalizer): if val == rpath: self.resource_map[key] = rpath+'_calibre_converted.jpg' img.set('src', rpath+'_calibre_converted.jpg') - + def fix_markup(self): ''' - Perform various markup transforms to get the output to render correctly + Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' # Replace
that are children of as ADE doesn't handle them @@ -179,8 +179,8 @@ class HTMLProcessor(Processor, Rationalizer): if not br.tail: br.tail = '' br.tail += sibling.tail - - + + if self.opts.profile.remove_object_tags: for tag in self.root.xpath('//embed'): tag.getparent().remove(tag) @@ -188,42 +188,46 @@ class HTMLProcessor(Processor, Rationalizer): if tag.get('type', '').lower().strip() in ('image/svg+xml',): continue tag.getparent().remove(tag) - - + + for tag in self.root.xpath('//title|//style'): if not tag.text: tag.getparent().remove(tag) for tag in self.root.xpath('//script'): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) - + for tag in self.root.xpath('//form'): tag.getparent().remove(tag) - + for tag in self.root.xpath('//center'): tag.tag = 'div' tag.set('style', 'text-align:center') - + if self.opts.linearize_tables: for tag in self.root.xpath('//table | //tr | //th | //td'): tag.tag = 'div' - - + + # ADE can't handle & in an img url + for tag in self.root.xpath('//img[@src]'): + tag.set('src', tag.get('src', '').replace('&', '')) + + def save(self): for meta in list(self.root.xpath('//meta')): meta.getparent().remove(meta) # Strip all comments since Adobe DE is petrified of them Processor.save(self, strip_comments=True) - + def remove_first_image(self): images = self.root.xpath('//img') if images: images[0].getparent().remove(images[0]) return True return False - - - + + + def config(defaults=None): return common_config(defaults=defaults) @@ -235,7 +239,7 @@ def option_parser(): Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file. If you specify an OPF file instead of an HTML file, the list of links is takes from -the element of the OPF file. +the element of the OPF file. ''')) def parse_content(filelist, opts, tdir): @@ -246,7 +250,7 @@ def parse_content(filelist, opts, tdir): first_image_removed = False for htmlfile in filelist: logging.getLogger('html2epub').debug('Processing %s...'%htmlfile) - hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), + hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), resource_map, filelist, stylesheets) if not first_image_removed and opts.remove_first_image: first_image_removed = hp.remove_first_image() @@ -254,7 +258,7 @@ def parse_content(filelist, opts, tdir): hp.save() stylesheet_map[os.path.basename(hp.save_path())] = \ [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None] - + logging.getLogger('html2epub').debug('Saving stylesheets...') if opts.base_font_size2 > 0: Rationalizer.remove_font_size_information(stylesheets.values()) @@ -268,7 +272,7 @@ def parse_content(filelist, opts, tdir): if toc.count('chapter') + toc.count('file') > opts.toc_threshold: toc.purge(['link', 'unknown']) toc.purge(['link'], max=opts.max_toc_links) - + return resource_map, hp.htmlfile_map, toc, stylesheet_map TITLEPAGE = '''\ @@ -325,26 +329,26 @@ def process_title_page(mi, filelist, htmlfilemap, opts, tdir): metadata_cover = mi.cover if metadata_cover and not os.path.exists(metadata_cover): metadata_cover = None - + cpath = '/'.join(('resources', '_cover_.jpg')) cover_dest = os.path.join(tdir, 'content', *cpath.split('/')) if metadata_cover is not None: - if not create_cover_image(metadata_cover, cover_dest, + if not create_cover_image(metadata_cover, cover_dest, opts.profile.screen_size): metadata_cover = None specified_cover = opts.cover if specified_cover and not os.path.exists(specified_cover): specified_cover = None if specified_cover is not None: - if not create_cover_image(specified_cover, cover_dest, + if not create_cover_image(specified_cover, cover_dest, opts.profile.screen_size): specified_cover = None - + cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover if cover is not None: titlepage = TITLEPAGE%cpath - tp = 'calibre_title_page.html' if old_title_page is None else old_title_page + tp = 'calibre_title_page.html' if old_title_page is None else old_title_page tppath = os.path.join(tdir, 'content', tp) with open(tppath, 'wb') as f: f.write(titlepage) @@ -370,7 +374,7 @@ def condense_ncx(ncx_path): compressed = etree.tostring(tree.getroot(), encoding='utf-8') open(ncx_path, 'wb').write(compressed) -def convert(htmlfile, opts, notification=None, create_epub=True, +def convert(htmlfile, opts, notification=None, create_epub=True, oeb_cover=False, extract_to=None): htmlfile = os.path.abspath(htmlfile) if opts.output is None: @@ -399,16 +403,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True, else: opf, filelist = get_filelist(htmlfile, opts) mi = merge_metadata(htmlfile, opf, opts) - opts.chapter = XPath(opts.chapter, + opts.chapter = XPath(opts.chapter, namespaces={'re':'http://exslt.org/regular-expressions'}) for x in (1, 2, 3): attr = 'level%d_toc'%x if getattr(opts, attr): - setattr(opts, attr, XPath(getattr(opts, attr), + setattr(opts, attr, XPath(getattr(opts, attr), namespaces={'re':'http://exslt.org/regular-expressions'})) else: - setattr(opts, attr, None) - + setattr(opts, attr, None) + with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir: if opts.keep_intermediate: print 'Intermediate files in', tdir @@ -416,16 +420,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True, parse_content(filelist, opts, tdir) logger = logging.getLogger('html2epub') resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()] - - + + title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir) spine = [htmlfile_map[f.path] for f in filelist] if not oeb_cover and title_page is not None: spine = [title_page] + spine mi.cover = None mi.cover_data = (None, None) - - + + mi = create_metadata(tdir, mi, spine, resources) buf = cStringIO.StringIO() if mi.toc: @@ -453,7 +457,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True, logger.info('\tBuilding page map...') add_page_map(opf_path, opts) check_links(opf_path, opts.pretty_print) - + opf = OPF(opf_path, tdir) opf.remove_guide() oeb_cover_file = None @@ -465,7 +469,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True, opf.add_guide_item('cover', 'Cover', 'content/'+spine[0]) if oeb_cover and oeb_cover_file: opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file) - + cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg') if os.path.exists(cpath): opf.add_path_to_manifest(cpath, 'image/jpeg') @@ -477,29 +481,29 @@ def convert(htmlfile, opts, notification=None, create_epub=True, condense_ncx(ncx_path) if os.stat(ncx_path).st_size > opts.profile.flow_size: logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size) - + if create_epub: epub = initialize_container(opts.output) epub.add_dir(tdir) epub.close() run_plugins_on_postprocess(opts.output, 'epub') logger.info(_('Output written to ')+opts.output) - + if opts.show_opf: print open(opf_path, 'rb').read() - + if opts.extract_to is not None: if os.path.exists(opts.extract_to): shutil.rmtree(opts.extract_to) shutil.copytree(tdir, opts.extract_to) - + if extract_to is not None: if os.path.exists(extract_to): shutil.rmtree(extract_to) shutil.copytree(tdir, extract_to) - - - + + + def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) @@ -509,6 +513,6 @@ def main(args=sys.argv): return 1 convert(args[1], opts) return 0 - + if __name__ == '__main__': sys.exit(main()) From f7195f81aa72346c9bed4abdce9fd55f3a8e5abe Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Apr 2009 15:18:29 -0700 Subject: [PATCH 7/8] Fix regression in windows that was causing downloaded news to not have a cover --- src/calibre/ebooks/__init__.py | 56 ++++++++++++++++++++++++++++- src/calibre/web/feeds/news.py | 66 ++++++++++++---------------------- 2 files changed, 78 insertions(+), 44 deletions(-) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 8f5912f397..e208b5a688 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -6,8 +6,11 @@ Code for the conversion of ebook formats and the reading of metadata from various formats. ''' +import traceback, os +from calibre import CurrentDir + class ConversionError(Exception): - + def __init__(self, msg, only_msg=False): Exception.__init__(self, msg) self.only_msg = only_msg @@ -22,3 +25,54 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'prc', 'mobi', 'azw', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip', 'rb', 'imp', 'odt'] + +class HTMLRenderer(object): + + def __init__(self, page, loop): + self.page, self.loop = page, loop + self.data = '' + self.exception = self.tb = None + + def __call__(self, ok): + from PyQt4.Qt import QImage, QPainter, QByteArray, QBuffer + try: + if not ok: + raise RuntimeError('Rendering of HTML failed.') + image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) + image.setDotsPerMeterX(96*(100/2.54)) + image.setDotsPerMeterY(96*(100/2.54)) + painter = QPainter(image) + self.page.mainFrame().render(painter) + painter.end() + ba = QByteArray() + buf = QBuffer(ba) + buf.open(QBuffer.WriteOnly) + image.save(buf, 'JPEG') + self.data = str(ba.data()) + except Exception, e: + self.exception = e + self.traceback = traceback.format_exc() + finally: + self.loop.exit(0) + + +def render_html(path_to_html, width=590, height=750): + from PyQt4.QtWebKit import QWebPage + from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize + path_to_html = os.path.abspath(path_to_html) + with CurrentDir(os.path.dirname(path_to_html)): + page = QWebPage() + pal = page.palette() + pal.setBrush(QPalette.Background, Qt.white) + page.setPalette(pal) + page.setViewportSize(QSize(width, height)) + page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) + page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) + loop = QEventLoop() + renderer = HTMLRenderer(page, loop) + + page.connect(page, SIGNAL('loadFinished(bool)'), renderer) + page.mainFrame().load(QUrl.fromLocalFile(path_to_html)) + loop.exec_() + return renderer + diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 5df0a24efe..8f4739c197 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -7,14 +7,12 @@ Defines various abstract base classes that can be subclassed to create powerful __docformat__ = "restructuredtext en" -import logging, os, cStringIO, time, traceback, re, urlparse, sys, tempfile, functools +import logging, os, cStringIO, time, traceback, re, urlparse, sys from collections import defaultdict from functools import partial from contextlib import nested, closing -from PyQt4.Qt import QApplication, QFile, Qt, QPalette, QSize, QImage, QPainter, \ - QBuffer, QByteArray, SIGNAL, QUrl, QEventLoop, QIODevice -from PyQt4.QtWebKit import QWebPage +from PyQt4.Qt import QApplication, QFile, QIODevice from calibre import browser, __appname__, iswindows, LoggingInterface, \ @@ -22,14 +20,15 @@ from calibre import browser, __appname__, iswindows, LoggingInterface, \ from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.lrf import entity_to_unicode +from calibre.ebooks import render_html from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import RecursiveFetcher from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending -from calibre.ptempfile import PersistentTemporaryFile -from calibre.gui2 import images_rc # Needed for default cover +from calibre.ptempfile import PersistentTemporaryFile, \ + PersistentTemporaryDirectory class BasicNewsRecipe(object, LoggingInterface): @@ -788,15 +787,18 @@ class BasicNewsRecipe(object, LoggingInterface): ''' Create a generic cover for recipes that dont have a cover ''' + from calibre.gui2 import images_rc # Needed for access to logo + images_rc if QApplication.instance() is None: QApplication([]) f = QFile(':/library') f.open(QIODevice.ReadOnly) - img = str(f.readAll()) + img_data = str(f.readAll()) + tdir = PersistentTemporaryDirectory('_default_cover') + img = os.path.join(tdir, 'logo.png') + with open(img, 'wb') as g: + g.write(img_data) f.close() - f = tempfile.NamedTemporaryFile(suffix='library.png') - f.write(img) - f.flush() - img = f.name + img = os.path.basename(img) html= u'''\ @@ -835,38 +837,16 @@ class BasicNewsRecipe(object, LoggingInterface): date=strftime(self.timefmt), app=__appname__ +' '+__version__, img=img) - f2 = tempfile.NamedTemporaryFile(suffix='cover.html') - f2.write(html.encode('utf-8')) - f2.flush() - page = QWebPage() - pal = page.palette() - pal.setBrush(QPalette.Background, Qt.white) - page.setPalette(pal) - page.setViewportSize(QSize(590, 750)) - page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) - page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) - loop = QEventLoop() - def render_html(page, loop, ok): - try: - image = QImage(page.viewportSize(), QImage.Format_ARGB32) - image.setDotsPerMeterX(96*(100/2.54)) - image.setDotsPerMeterY(96*(100/2.54)) - painter = QPainter(image) - page.mainFrame().render(painter) - painter.end() - ba = QByteArray() - buf = QBuffer(ba) - buf.open(QBuffer.WriteOnly) - image.save(buf, 'JPEG') - image_data = str(ba.data()) - cover_file.write(image_data) - cover_file.flush() - finally: - loop.exit(0) - - page.connect(page, SIGNAL('loadFinished(bool)'), functools.partial(render_html, page, loop)) - page.mainFrame().load(QUrl.fromLocalFile(f2.name)) - loop.exec_() + hf = os.path.join(tdir, 'cover.htm') + with open(hf, 'wb') as f: + f.write(html.encode('utf-8')) + renderer = render_html(hf) + if renderer.tb is not None: + self.logger.warning('Failed to render default cover') + self.logger.debug(renderer.tb) + else: + cover_file.write(renderer.data) + cover_file.flush() def create_opf(self, feeds, dir=None): From 97d676d3a6a40b8aa634ccfba3a224fb6d7642f8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Apr 2009 15:20:41 -0700 Subject: [PATCH 8/8] version 0.5.5 --- src/calibre/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 9d9fc7310a..04284ab8b3 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.5.4' +__version__ = '0.5.5' __author__ = "Kovid Goyal " ''' Various run time constants.