From 54dd263be1ab436bf55027b469f77e40cf155997 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Apr 2009 09:23:44 -0700 Subject: [PATCH 1/5] IGN:Fix bug in anchor insertion routine where it was inserting an anchor in the wrong place --- src/calibre/ebooks/mobi/reader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index d217f5be6a..1f84c4f5d4 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -587,7 +587,8 @@ class MobiReader(object): if r > -1 and (r < l or l == end or l == -1): p = self.mobi_html.rfind('<', 0, end + 1) if pos < end and p > -1 and \ - not end_tag_re.match(self.mobi_html[p:r]): + not end_tag_re.match(self.mobi_html[p:r]) and \ + not self.mobi_html[p:r+1].endswith('/>'): anchor = ' filepos-id="filepos%d"' end = r else: From 8e68f9d4dd1e630b0ae8391f10c25c6e6d9a1743 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Apr 2009 15:25:25 -0700 Subject: [PATCH 2/5] Fix #2350 (conversion of a mobipocket file (huffdic compressed) fail) --- src/calibre/ebooks/mobi/reader.py | 1 + src/calibre/utils/terminfo.py | 66 ++++++++++++++++--------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 1f84c4f5d4..95a05e69c0 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -382,6 +382,7 @@ class MobiReader(object): } mobi_version = self.book_header.mobi_version for tag in root.iter(etree.Element): + tag.attrib.pop('xmlns', '') if tag.tag in ('country-region', 'place', 'placetype', 'placename', 'state', 'city', 'street', 'address', 'content'): tag.tag = 'div' if tag.tag == 'content' else 'span' diff --git a/src/calibre/utils/terminfo.py b/src/calibre/utils/terminfo.py index 075c0e694d..1cbd4d830a 100644 --- a/src/calibre/utils/terminfo.py +++ b/src/calibre/utils/terminfo.py @@ -7,22 +7,22 @@ import sys, re, os class TerminalController: """ A class that can be used to portably generate formatted output to - a terminal. - + a terminal. + `TerminalController` defines a set of instance variables whose values are initialized to the control sequence necessary to perform a given action. These can be simply included in normal output to the terminal: - + >>> term = TerminalController() >>> print 'This is '+term.GREEN+'green'+term.NORMAL - + Alternatively, the `render()` method can used, which replaces '${action}' with the string required to perform 'action': - + >>> term = TerminalController() >>> print term.render('This is ${GREEN}green${NORMAL}') - + If the terminal doesn't support a given action, then the value of the corresponding instance variable will be set to ''. As a result, the above code will still work on terminals that do not @@ -30,11 +30,11 @@ class TerminalController: Also, this means that you can test whether the terminal supports a given action by simply testing the truth value of the corresponding instance variable: - + >>> term = TerminalController() >>> if term.CLEAR_SCREEN: ... print 'This terminal supports clearning the screen.' - + Finally, if the width and height of the terminal are known, then they will be stored in the `COLS` and `LINES` attributes. """ @@ -44,35 +44,35 @@ class TerminalController: DOWN = '' #: Move the cursor down one line LEFT = '' #: Move the cursor left one char RIGHT = '' #: Move the cursor right one char - + # Deletion: CLEAR_SCREEN = '' #: Clear the screen and move to home position CLEAR_EOL = '' #: Clear to the end of the line. CLEAR_BOL = '' #: Clear to the beginning of the line. CLEAR_EOS = '' #: Clear to the end of the screen - + # Output modes: BOLD = '' #: Turn on bold mode BLINK = '' #: Turn on blink mode DIM = '' #: Turn on half-bright mode REVERSE = '' #: Turn on reverse-video mode NORMAL = '' #: Turn off all modes - + # Cursor display: HIDE_CURSOR = '' #: Make the cursor invisible SHOW_CURSOR = '' #: Make the cursor visible - + # Terminal size: COLS = None #: Width of the terminal (None for unknown) LINES = None #: Height of the terminal (None for unknown) - + # Foreground colors: BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = '' - + # Background colors: BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = '' BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = '' - + _STRING_CAPABILITIES = """ BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1 CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold @@ -80,7 +80,7 @@ class TerminalController: HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split() _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split() _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split() - + def __init__(self, term_stream=sys.stdout): """ Create a `TerminalController` and initialize its attributes @@ -92,24 +92,24 @@ class TerminalController: # Curses isn't available on all platforms try: import curses except: return - + # If the stream isn't a tty, then assume it has no capabilities. if os.environ.get('CALIBRE_WORKER', None) is not None or not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return - + # Check the terminal type. If we fail, then assume that the # terminal has no capabilities. try: curses.setupterm() except: return - + # Look up numeric capabilities. self.COLS = curses.tigetnum('cols') self.LINES = curses.tigetnum('lines') - + # Look up string capabilities. for capability in self._STRING_CAPABILITIES: (attrib, cap_name) = capability.split('=') setattr(self, attrib, self._tigetstr(cap_name) or '') - + # Colors set_fg = self._tigetstr('setf') if set_fg: @@ -127,7 +127,7 @@ class TerminalController: if set_bg_ansi: for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS): setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '') - + def _tigetstr(self, cap_name): # String capabilities can include "delays" of the form "$<2>". # For any modern terminal, we should be able to just ignore @@ -135,7 +135,7 @@ class TerminalController: import curses cap = curses.tigetstr(cap_name) or '' return re.sub(r'\$<\d+>[/*]?', '', cap) - + def render(self, template): """ Replace each $-substitutions in the given template string with @@ -143,7 +143,7 @@ class TerminalController: '' (if it's not). """ return re.sub(r'\$\$|\${\w+}', self._render_sub, template) - + def _render_sub(self, match): s = match.group() if s == '$$': return s @@ -156,20 +156,20 @@ class TerminalController: class ProgressBar: """ A 3-line progress bar, which looks like:: - + Header 20% [===========----------------------------------] progress message - + The progress bar is colored, if the terminal supports color output; and adjusts to the width of the terminal. - + If the terminal doesn't have the required capabilities, it uses a simple progress bar. """ BAR = '%3d%% ${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}\n' HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n' - + def __init__(self, term, header, no_progress_bar = False): self.term, self.no_progress_bar = term, no_progress_bar self.fancy = self.term.CLEAR_EOL and self.term.UP and self.term.BOL @@ -177,12 +177,14 @@ class ProgressBar: self.width = self.term.COLS or 75 self.bar = term.render(self.BAR) self.header = self.term.render(self.HEADER % header.center(self.width)) + if isinstance(self.header, unicode): + self.header = self.header.encode('utf-8') self.cleared = 1 #: true if we haven't drawn the bar yet. - + def update(self, percent, message=''): if isinstance(message, unicode): message = message.encode('utf-8', 'replace') - + if self.no_progress_bar: if message: print message @@ -203,8 +205,8 @@ class ProgressBar: else: print '%d%%'%(percent*100), message sys.stdout.flush() - - + + def clear(self): if self.fancy and not self.cleared: sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL + From 021149097fdea34eba3c52045bf64c32b5508146 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Apr 2009 15:35:24 -0700 Subject: [PATCH 3/5] Fix #2311 (Failure to parse an lrf file) --- src/calibre/ebooks/lrf/lrfparser.py | 32 ++++++++++++++--------------- src/calibre/ebooks/lrf/objects.py | 7 ++++++- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/lrf/lrfparser.py b/src/calibre/ebooks/lrf/lrfparser.py index d74841515c..566f0b38f1 100644 --- a/src/calibre/ebooks/lrf/lrfparser.py +++ b/src/calibre/ebooks/lrf/lrfparser.py @@ -5,16 +5,16 @@ __copyright__ = '2008, Kovid Goyal ' import sys, array, os, re, codecs, logging from calibre import setup_cli_handlers, sanitize_file_name -from calibre.utils.config import OptionParser +from calibre.utils.config import OptionParser from calibre.ebooks.lrf.meta import LRFMetaFile from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \ Font, Text, TOCObject, BookAttr, ruby_tags - + class LRFDocument(LRFMetaFile): - + class temp(object): pass - + def __init__(self, stream): LRFMetaFile.__init__(self, stream) self.scramble_key = self.xor_key @@ -23,11 +23,11 @@ class LRFDocument(LRFMetaFile): self.image_map = {} self.toc = '' self.keep_parsing = True - + def parse(self): self._parse_objects() self.metadata = LRFDocument.temp() - for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', + for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', 'classification', 'free_text', 'publisher', 'label', 'category'): setattr(self.metadata, a, getattr(self, a)) self.doc_info = LRFDocument.temp() @@ -37,7 +37,7 @@ class LRFDocument(LRFMetaFile): self.device_info = LRFDocument.temp() for a in ('dpi', 'width', 'height'): setattr(self.device_info, a, getattr(self, a)) - + def _parse_objects(self): self.objects = {} self._file.seek(self.object_index_offset) @@ -68,15 +68,15 @@ class LRFDocument(LRFMetaFile): attr = h[0] if hasattr(obj, attr): self.ruby_tags[attr] = getattr(obj, attr) - + def __iter__(self): for pt in self.page_trees: yield pt - + def write_files(self): for obj in self.image_map.values() + self.font_map.values(): - open(obj.file, 'wb').write(obj.stream) - + open(obj.file, 'wb').write(obj.stream) + def to_xml(self, write_files=True): bookinfo = u'\n\n\n' bookinfo += u'%s\n'%(self.metadata.title_reading, self.metadata.title) @@ -113,7 +113,7 @@ class LRFDocument(LRFMetaFile): pages += unicode(page) pages += close traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] - + objects = u'\n\n' styles = u'\n