This commit is contained in:
Kovid Goyal 2019-06-22 05:04:22 +05:30
commit cc7eaf016a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
48 changed files with 463 additions and 430 deletions

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" """

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" """

View File

@ -1,99 +1,101 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import re import re
NAME_MAP = { NAME_MAP = {
u'aliceblue': u'#F0F8FF', 'aliceblue': '#F0F8FF',
u'antiquewhite': u'#FAEBD7', 'antiquewhite': '#FAEBD7',
u'aqua': u'#00FFFF', 'aqua': '#00FFFF',
u'aquamarine': u'#7FFFD4', 'aquamarine': '#7FFFD4',
u'azure': u'#F0FFFF', 'azure': '#F0FFFF',
u'beige': u'#F5F5DC', 'beige': '#F5F5DC',
u'bisque': u'#FFE4C4', 'bisque': '#FFE4C4',
u'black': u'#000000', 'black': '#000000',
u'blanchedalmond': u'#FFEBCD', 'blanchedalmond': '#FFEBCD',
u'blue': u'#0000FF', 'blue': '#0000FF',
u'brown': u'#A52A2A', 'brown': '#A52A2A',
u'burlywood': u'#DEB887', 'burlywood': '#DEB887',
u'cadetblue': u'#5F9EA0', 'cadetblue': '#5F9EA0',
u'chartreuse': u'#7FFF00', 'chartreuse': '#7FFF00',
u'chocolate': u'#D2691E', 'chocolate': '#D2691E',
u'coral': u'#FF7F50', 'coral': '#FF7F50',
u'crimson': u'#DC143C', 'crimson': '#DC143C',
u'cyan': u'#00FFFF', 'cyan': '#00FFFF',
u'darkblue': u'#00008B', 'darkblue': '#00008B',
u'darkgoldenrod': u'#B8860B', 'darkgoldenrod': '#B8860B',
u'darkgreen': u'#006400', 'darkgreen': '#006400',
u'darkkhaki': u'#BDB76B', 'darkkhaki': '#BDB76B',
u'darkmagenta': u'#8B008B', 'darkmagenta': '#8B008B',
u'darkolivegreen': u'#556B2F', 'darkolivegreen': '#556B2F',
u'darkorange': u'#FF8C00', 'darkorange': '#FF8C00',
u'darkorchid': u'#9932CC', 'darkorchid': '#9932CC',
u'darkred': u'#8B0000', 'darkred': '#8B0000',
u'darksalmon': u'#E9967A', 'darksalmon': '#E9967A',
u'darkslateblue': u'#483D8B', 'darkslateblue': '#483D8B',
u'darkslategrey': u'#2F4F4F', 'darkslategrey': '#2F4F4F',
u'darkviolet': u'#9400D3', 'darkviolet': '#9400D3',
u'deeppink': u'#FF1493', 'deeppink': '#FF1493',
u'dodgerblue': u'#1E90FF', 'dodgerblue': '#1E90FF',
u'firebrick': u'#B22222', 'firebrick': '#B22222',
u'floralwhite': u'#FFFAF0', 'floralwhite': '#FFFAF0',
u'forestgreen': u'#228B22', 'forestgreen': '#228B22',
u'fuchsia': u'#FF00FF', 'fuchsia': '#FF00FF',
u'gainsboro': u'#DCDCDC', 'gainsboro': '#DCDCDC',
u'ghostwhite': u'#F8F8FF', 'ghostwhite': '#F8F8FF',
u'gold': u'#FFD700', 'gold': '#FFD700',
u'goldenrod': u'#DAA520', 'goldenrod': '#DAA520',
u'indianred ': u'#CD5C5C', 'indianred ': '#CD5C5C',
u'indigo ': u'#4B0082', 'indigo ': '#4B0082',
u'khaki': u'#F0E68C', 'khaki': '#F0E68C',
u'lavenderblush': u'#FFF0F5', 'lavenderblush': '#FFF0F5',
u'lawngreen': u'#7CFC00', 'lawngreen': '#7CFC00',
u'lightblue': u'#ADD8E6', 'lightblue': '#ADD8E6',
u'lightcoral': u'#F08080', 'lightcoral': '#F08080',
u'lightgoldenrodyellow': u'#FAFAD2', 'lightgoldenrodyellow': '#FAFAD2',
u'lightgray': u'#D3D3D3', 'lightgray': '#D3D3D3',
u'lightgrey': u'#D3D3D3', 'lightgrey': '#D3D3D3',
u'lightskyblue': u'#87CEFA', 'lightskyblue': '#87CEFA',
u'lightslategrey': u'#778899', 'lightslategrey': '#778899',
u'lightsteelblue': u'#B0C4DE', 'lightsteelblue': '#B0C4DE',
u'lime': u'#87CEFA', 'lime': '#87CEFA',
u'linen': u'#FAF0E6', 'linen': '#FAF0E6',
u'magenta': u'#FF00FF', 'magenta': '#FF00FF',
u'maroon': u'#800000', 'maroon': '#800000',
u'mediumaquamarine': u'#66CDAA', 'mediumaquamarine': '#66CDAA',
u'mediumblue': u'#0000CD', 'mediumblue': '#0000CD',
u'mediumorchid': u'#BA55D3', 'mediumorchid': '#BA55D3',
u'mediumpurple': u'#9370D8', 'mediumpurple': '#9370D8',
u'mediumseagreen': u'#3CB371', 'mediumseagreen': '#3CB371',
u'mediumslateblue': u'#7B68EE', 'mediumslateblue': '#7B68EE',
u'midnightblue': u'#191970', 'midnightblue': '#191970',
u'moccasin': u'#FFE4B5', 'moccasin': '#FFE4B5',
u'navajowhite': u'#FFDEAD', 'navajowhite': '#FFDEAD',
u'navy': u'#000080', 'navy': '#000080',
u'oldlace': u'#FDF5E6', 'oldlace': '#FDF5E6',
u'olive': u'#808000', 'olive': '#808000',
u'orange': u'#FFA500', 'orange': '#FFA500',
u'orangered': u'#FF4500', 'orangered': '#FF4500',
u'orchid': u'#DA70D6', 'orchid': '#DA70D6',
u'paleturquoise': u'#AFEEEE', 'paleturquoise': '#AFEEEE',
u'papayawhip': u'#FFEFD5', 'papayawhip': '#FFEFD5',
u'peachpuff': u'#FFDAB9', 'peachpuff': '#FFDAB9',
u'powderblue': u'#B0E0E6', 'powderblue': '#B0E0E6',
u'rosybrown': u'#BC8F8F', 'rosybrown': '#BC8F8F',
u'royalblue': u'#4169E1', 'royalblue': '#4169E1',
u'saddlebrown': u'#8B4513', 'saddlebrown': '#8B4513',
u'sandybrown': u'#8B4513', 'sandybrown': '#8B4513',
u'seashell': u'#FFF5EE', 'seashell': '#FFF5EE',
u'sienna': u'#A0522D', 'sienna': '#A0522D',
u'silver': u'#C0C0C0', 'silver': '#C0C0C0',
u'skyblue': u'#87CEEB', 'skyblue': '#87CEEB',
u'slategrey': u'#708090', 'slategrey': '#708090',
u'snow': u'#FFFAFA', 'snow': '#FFFAFA',
u'springgreen': u'#00FF7F', 'springgreen': '#00FF7F',
u'violet': u'#EE82EE', 'violet': '#EE82EE',
u'yellowgreen': u'#9ACD32' 'yellowgreen': '#9ACD32'
} }
hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})') hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})')

View File

@ -1,4 +1,5 @@
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" """
@ -106,7 +107,7 @@ class HTMLConverter(object):
re.IGNORECASE), lambda m: '<br />'), re.IGNORECASE), lambda m: '<br />'),
# Replace entities # Replace entities
(re.compile(u'&(\\S+?);'), partial(entity_to_unicode, (re.compile(r'&(\S+?);'), partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp', 'quot'])), exceptions=['lt', 'gt', 'amp', 'quot'])),
# Remove comments from within style tags as they can mess up BeatifulSoup # Remove comments from within style tags as they can mess up BeatifulSoup
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL), (re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),
@ -151,16 +152,16 @@ class HTMLConverter(object):
(re.compile('<hr>', re.IGNORECASE), (re.compile('<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'), lambda match : '<span style="page-break-after:always"> </span>'),
# Create header tags # Create header tags
(re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), (re.compile(r'<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))), lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE), (re.compile(r'<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))), lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), (re.compile(r'<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)), lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), (re.compile(r'<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)), lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
# Blank lines # Blank lines
(re.compile('<div[^><]*?>(&nbsp;){4}</div>', re.IGNORECASE), (re.compile(r'<div[^><]*?>(&nbsp;){4}</div>', re.IGNORECASE),
lambda match : '<p></p>'), lambda match : '<p></p>'),
] ]
@ -351,10 +352,9 @@ class HTMLConverter(object):
if not os.path.exists(tdir): if not os.path.exists(tdir):
os.makedirs(tdir) os.makedirs(tdir)
try: try:
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f:
dump.write(unicode_type(soup).encode('utf-8')) f.write(unicode_type(soup).encode('utf-8'))
self.log.info(_('Written preprocessed HTML to ')+dump.name) self.log.info(_('Written preprocessed HTML to ')+f.name)
dump.close()
except: except:
pass pass
@ -374,7 +374,7 @@ class HTMLConverter(object):
if not os.path.exists(path): if not os.path.exists(path):
path = path.replace('&', '%26') # convertlit replaces & with %26 in file names path = path.replace('&', '%26') # convertlit replaces & with %26 in file names
f = open(path, 'rb') with open(path, 'rb') as f:
raw = f.read() raw = f.read()
if self.pdftohtml: # Bug in pdftohtml that causes it to output invalid UTF-8 files if self.pdftohtml: # Bug in pdftohtml that causes it to output invalid UTF-8 files
raw = raw.decode('utf-8', 'ignore') raw = raw.decode('utf-8', 'ignore')
@ -382,7 +382,6 @@ class HTMLConverter(object):
raw = raw.decode(self.encoding, 'ignore') raw = raw.decode(self.encoding, 'ignore')
else: else:
raw = xml_to_unicode(raw, self.verbose)[0] raw = xml_to_unicode(raw, self.verbose)[0]
f.close()
soup = self.preprocess(raw) soup = self.preprocess(raw)
self.log.info(_('\tConverting to BBeB...')) self.log.info(_('\tConverting to BBeB...'))
self.current_style = {} self.current_style = {}
@ -397,12 +396,12 @@ class HTMLConverter(object):
def parse_css(self, style): def parse_css(self, style):
""" """
Parse the contents of a <style> tag or .css file. Parse the contents of a <style> tag or .css file.
@param style: C{str(style)} should be the CSS to parse. @param style: C{unicode_type(style)} should be the CSS to parse.
@return: A dictionary with one entry per selector where the key is the @return: A dictionary with one entry per selector where the key is the
selector name and the value is a dictionary of properties selector name and the value is a dictionary of properties
""" """
sdict, pdict = {}, {} sdict, pdict = {}, {}
style = re.sub('/\\*.*?\\*/', '', style) # Remove /*...*/ comments style = re.sub(r'/\*.*?\*/', '', style) # Remove /*...*/ comments
for sel in re.findall(HTMLConverter.SELECTOR_PAT, style): for sel in re.findall(HTMLConverter.SELECTOR_PAT, style):
for key in sel[0].split(','): for key in sel[0].split(','):
val = self.parse_style_properties(sel[1]) val = self.parse_style_properties(sel[1])
@ -789,7 +788,7 @@ class HTMLConverter(object):
src = src.lstrip() src = src.lstrip()
f = src[0] f = src[0]
next = 1 next = 1
if f in ("'", '"', u'\u201c', u'\u2018', u'\u201d', u'\u2019'): if f in ("'", '"', '\u201c', '\u2018', '\u201d', '\u2019'):
if len(src) >= 2: if len(src) >= 2:
next = 2 next = 2
f = src[:2] f = src[:2]
@ -805,14 +804,14 @@ class HTMLConverter(object):
def append_text(src): def append_text(src):
fp, key, variant = self.font_properties(css) fp, key, variant = self.font_properties(css)
for x, y in [(u'\xad', ''), (u'\xa0', ' '), (u'\ufb00', 'ff'), (u'\ufb01', 'fi'), (u'\ufb02', 'fl'), (u'\ufb03', 'ffi'), (u'\ufb04', 'ffl')]: for x, y in [('\xad', ''), ('\xa0', ' '), ('\ufb00', 'ff'), ('\ufb01', 'fi'), ('\ufb02', 'fl'), ('\ufb03', 'ffi'), ('\ufb04', 'ffl')]:
src = src.replace(x, y) src = src.replace(x, y)
valigner = lambda x: x valigner = lambda x: x
if 'vertical-align' in css: if 'vertical-align' in css:
valign = css['vertical-align'] valign = css['vertical-align']
if valign in ('sup', 'super', 'sub'): if valign in ('sup', 'super', 'sub'):
fp['fontsize'] = int(int(fp['fontsize']) * 5 / 3.0) fp['fontsize'] = int(fp['fontsize']) * 5 // 3
valigner = Sub if valign == 'sub' else Sup valigner = Sub if valign == 'sub' else Sup
normal_font_size = int(fp['fontsize']) normal_font_size = int(fp['fontsize'])
@ -864,12 +863,12 @@ class HTMLConverter(object):
if collapse_whitespace: if collapse_whitespace:
src = re.sub(r'\s{1,}', ' ', src) src = re.sub(r'\s{1,}', ' ', src)
if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')): if self.stripped_space and len(src) == len(src.lstrip(' \n\r\t')):
src = self.stripped_space + src src = self.stripped_space + src
src, orig = src.rstrip(u' \n\r\t'), src src, orig = src.rstrip(' \n\r\t'), src
self.stripped_space = orig[len(src):] self.stripped_space = orig[len(src):]
if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')): if len(self.previous_text) != len(self.previous_text.rstrip(' \n\r\t')):
src = src.lstrip(u' \n\r\t') src = src.lstrip(' \n\r\t')
if len(src): if len(src):
self.previous_text = src self.previous_text = src
append_text(src) append_text(src)
@ -971,8 +970,8 @@ class HTMLConverter(object):
xsize=width, ysize=height) xsize=width, ysize=height)
line_height = (int(self.current_block.textStyle.attrs['baselineskip']) + line_height = (int(self.current_block.textStyle.attrs['baselineskip']) +
int(self.current_block.textStyle.attrs['linespace']))//10 int(self.current_block.textStyle.attrs['linespace']))//10
line_height *= self.profile.dpi/72. line_height *= self.profile.dpi/72
lines = int(ceil(float(height)/line_height)) lines = int(ceil(height/line_height))
dc = DropCaps(lines) dc = DropCaps(lines)
dc.append(Plot(im, xsize=ceil(width*factor), ysize=ceil(height*factor))) dc.append(Plot(im, xsize=ceil(width*factor), ysize=ceil(height*factor)))
self.current_para.append(dc) self.current_para.append(dc)
@ -1011,10 +1010,10 @@ class HTMLConverter(object):
self.process_alignment(tag_css) self.process_alignment(tag_css)
if max(width, height) <= min(pwidth, pheight)/5.: if max(width, height) <= min(pwidth, pheight)/5:
self.current_para.append(Plot(im, xsize=ceil(width*factor), self.current_para.append(Plot(im, xsize=ceil(width*factor),
ysize=ceil(height*factor))) ysize=ceil(height*factor)))
elif height <= int(floor((2/3.)*pheight)): elif height <= int(floor((2/3)*pheight)):
pb = self.current_block pb = self.current_block
self.end_current_para() self.end_current_para()
self.process_alignment(tag_css) self.process_alignment(tag_css)
@ -1032,7 +1031,7 @@ class HTMLConverter(object):
self.current_page.contents[0:1] = [] self.current_page.contents[0:1] = []
self.current_page.append(Canvas(width=pwidth, self.current_page.append(Canvas(width=pwidth,
height=height)) height=height))
left = int(floor((pwidth - width)/2.)) left = int(floor((pwidth - width)/2))
self.current_page.contents[-1].put_object( self.current_page.contents[-1].put_object(
ImageBlock(self.images[path], xsize=width, ImageBlock(self.images[path], xsize=width,
ysize=height, x1=width, y1=height, ysize=height, x1=width, y1=height,
@ -1083,7 +1082,7 @@ class HTMLConverter(object):
s1, s2 = get('margin'), get('padding') s1, s2 = get('margin'), get('padding')
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px' bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
def set(default, one, two): def set(default, one, two):
fval = None fval = None
@ -1113,7 +1112,7 @@ class HTMLConverter(object):
val /= 2. val /= 2.
ans['sidemargin'] = int(val) ans['sidemargin'] = int(val)
if 2*int(ans['sidemargin']) >= factor*int(self.current_block.blockStyle.attrs['blockwidth']): if 2*int(ans['sidemargin']) >= factor*int(self.current_block.blockStyle.attrs['blockwidth']):
ans['sidemargin'] = int((factor*int(self.current_block.blockStyle.attrs['blockwidth']))/2.) ans['sidemargin'] = (factor*int(self.current_block.blockStyle.attrs['blockwidth'])) // 2
for prop in ('topskip', 'footskip', 'sidemargin'): for prop in ('topskip', 'footskip', 'sidemargin'):
if isinstance(ans[prop], string_or_bytes): if isinstance(ans[prop], string_or_bytes):
@ -1212,7 +1211,7 @@ class HTMLConverter(object):
ans = 120 ans = 120
if ans is not None: if ans is not None:
ans += int(self.font_delta * 20) ans += int(self.font_delta * 20)
ans = str(ans) ans = unicode_type(ans)
return ans return ans
family, weight, style, variant = 'serif', 'normal', 'normal', None family, weight, style, variant = 'serif', 'normal', 'normal', None
@ -1268,7 +1267,7 @@ class HTMLConverter(object):
fs = int(t['fontsize']) fs = int(t['fontsize'])
if fs > 120: if fs > 120:
t['wordspace'] = int(fs/4.) t['wordspace'] = fs // 4
t['baselineskip'] = fs + 20 t['baselineskip'] = fs + 20
return t, key, variant return t, key, variant
@ -1290,27 +1289,27 @@ class HTMLConverter(object):
unit = float(m.group(1)) unit = float(m.group(1))
if m.group(2) == '%': if m.group(2) == '%':
normal = self.unit_convert(base_length) normal = self.unit_convert(base_length)
result = (unit/100.0) * normal result = (unit/100) * normal
elif m.group(2) == 'px': elif m.group(2) == 'px':
result = unit result = unit
elif m.group(2) == 'in': elif m.group(2) == 'in':
result = unit * dpi result = unit * dpi
elif m.group(2) == 'pt': elif m.group(2) == 'pt':
result = unit * dpi/72. result = unit * dpi/72
elif m.group(2) == 'dpt': elif m.group(2) == 'dpt':
result = unit * dpi/720. result = unit * dpi/720
elif m.group(2) == 'em': elif m.group(2) == 'em':
normal = self.unit_convert(base_length) normal = self.unit_convert(base_length)
result = unit * normal result = unit * normal
elif m.group(2) == 'pc': elif m.group(2) == 'pc':
result = unit * (dpi/72.) * 12 result = unit * (dpi/72) * 12
elif m.group(2) == 'mm': elif m.group(2) == 'mm':
result = unit * 0.04 * (dpi) result = unit * 0.04 * (dpi)
elif m.group(2) == 'cm': elif m.group(2) == 'cm':
result = unit * 0.4 * (dpi) result = unit * 0.4 * (dpi)
if result is not None: if result is not None:
if pts: if pts:
result = int(round(result * (720./dpi))) result = int(round(result * (720/dpi)))
else: else:
result = int(round(result)) result = int(round(result))
return result return result
@ -1318,7 +1317,7 @@ class HTMLConverter(object):
def text_properties(self, tag_css): def text_properties(self, tag_css):
indent = self.book.defaultTextStyle.attrs['parindent'] indent = self.book.defaultTextStyle.attrs['parindent']
if 'text-indent' in tag_css: if 'text-indent' in tag_css:
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px' bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
if 'em' in tag_css['text-indent']: if 'em' in tag_css['text-indent']:
bl = '10pt' bl = '10pt'
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl) indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
@ -1349,12 +1348,12 @@ class HTMLConverter(object):
''' Ensure padding and text-indent properties are respected ''' ''' Ensure padding and text-indent properties are respected '''
text_properties = self.text_properties(tag_css) text_properties = self.text_properties(tag_css)
block_properties = self.block_properties(tag_css) block_properties = self.block_properties(tag_css)
indent = (float(text_properties['parindent'])/10) * (self.profile.dpi/72.) indent = (float(text_properties['parindent'])//10) * (self.profile.dpi/72)
margin = float(block_properties['sidemargin']) margin = float(block_properties['sidemargin'])
# Since we're flattening the block structure, we need to ensure that text # Since we're flattening the block structure, we need to ensure that text
# doesn't go off the left edge of the screen # doesn't go off the left edge of the screen
if indent < 0 and margin + indent < 0: if indent < 0 and margin + indent < 0:
text_properties['parindent'] = int(-margin * (72./self.profile.dpi) * 10) text_properties['parindent'] = int(-margin * (72/self.profile.dpi) * 10)
align = self.get_alignment(tag_css) align = self.get_alignment(tag_css)
@ -1515,7 +1514,7 @@ class HTMLConverter(object):
elif not urlparse(tag['src'])[0]: elif not urlparse(tag['src'])[0]:
self.log.warn('Could not find image: '+tag['src']) self.log.warn('Could not find image: '+tag['src'])
else: else:
self.log.debug("Failed to process: %s"%str(tag)) self.log.debug("Failed to process: %s"%unicode_type(tag))
elif tagname in ['style', 'link']: elif tagname in ['style', 'link']:
ncss, npcss = {}, {} ncss, npcss = {}, {}
if tagname == 'style': if tagname == 'style':
@ -1609,7 +1608,7 @@ class HTMLConverter(object):
in_ol = parent.name.lower() == 'ol' in_ol = parent.name.lower() == 'ol'
break break
parent = parent.parent parent = parent.parent
prepend = str(self.list_counter)+'. ' if in_ol else u'\u2022' + ' ' prepend = unicode_type(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
self.current_para.append(Span(prepend)) self.current_para.append(Span(prepend))
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
if in_ol: if in_ol:
@ -1652,7 +1651,7 @@ class HTMLConverter(object):
if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])): if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])):
if not tag.has_attr('id'): if not tag.has_attr('id'):
tag['id'] = __appname__+'_id_'+str(self.id_counter) tag['id'] = __appname__+'_id_'+unicode_type(self.id_counter)
self.id_counter += 1 self.id_counter += 1
tkey = self.target_prefix+tag['id'] tkey = self.target_prefix+tag['id']
@ -1781,7 +1780,7 @@ class HTMLConverter(object):
else: else:
if xpos > 65535: if xpos > 65535:
xpos = 65535 xpos = 65535
canvases[-1].put_object(block, xpos + int(delta/2.), ypos) canvases[-1].put_object(block, xpos + delta//2, ypos)
for canvas in canvases: for canvas in canvases:
self.current_page.append(canvas) self.current_page.append(canvas)
@ -1819,7 +1818,7 @@ def process_file(path, options, logger):
options.profile.screen_height - options.profile.fudge options.profile.screen_height - options.profile.fudge
width, height = im.size width, height = im.size
if width < pwidth: if width < pwidth:
corrf = float(pwidth)/width corrf = pwidth/width
width, height = pwidth, int(corrf*height) width, height = pwidth, int(corrf*height)
scaled, width, height = fit_image(width, height, pwidth, pheight) scaled, width, height = fit_image(width, height, pwidth, pheight)
@ -1934,7 +1933,8 @@ def try_opf(path, options, logger):
dirpath = os.path.dirname(os.path.abspath(opf)) dirpath = os.path.dirname(os.path.abspath(opf))
from calibre.ebooks.metadata.opf2 import OPF as OPF2 from calibre.ebooks.metadata.opf2 import OPF as OPF2
opf = OPF2(open(opf, 'rb'), dirpath) with open(opf, 'rb') as f:
opf = OPF2(f, dirpath)
try: try:
title = opf.title title = opf.title
if title and not getattr(options, 'title', None): if title and not getattr(options, 'title', None):

View File

@ -1,4 +1,5 @@
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import math, sys, re, numbers import math, sys, re, numbers
@ -125,7 +126,7 @@ class Cell(object):
def pts_to_pixels(self, pts): def pts_to_pixels(self, pts):
pts = int(pts) pts = int(pts)
return ceil((float(self.conv.profile.dpi)/72.)*(pts/10.)) return ceil((float(self.conv.profile.dpi)/72)*(pts/10))
def minimum_width(self): def minimum_width(self):
return max([self.minimum_tb_width(tb) for tb in self.text_blocks]) return max([self.minimum_tb_width(tb) for tb in self.text_blocks])
@ -328,15 +329,15 @@ class Table(object):
min_widths = [self.minimum_width(i)+10 for i in range(cols)] min_widths = [self.minimum_width(i)+10 for i in range(cols)]
for i in range(len(widths)): for i in range(len(widths)):
wp = self.width_percent(i) wp = self.width_percent(i)
if wp >= 0.: if wp >= 0:
widths[i] = max(min_widths[i], ceil((wp/100.) * (maxwidth - (cols-1)*self.colpad))) widths[i] = max(min_widths[i], ceil((wp/100) * (maxwidth - (cols-1)*self.colpad)))
itercount = 0 itercount = 0
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100: while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
for i in range(cols): for i in range(cols):
widths[i] = ceil((95./100.)*widths[i]) if \ widths[i] = ceil((95/100)*widths[i]) if \
ceil((95./100.)*widths[i]) >= min_widths[i] else widths[i] ceil((95/100)*widths[i]) >= min_widths[i] else widths[i]
itercount += 1 itercount += 1
return [i+self.colpad for i in widths] return [i+self.colpad for i in widths]

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -73,7 +75,7 @@ def render_table(soup, table, css, base_dir, width, height, dpi, factor=1.0):
style = '' style = ''
for key, val in css.items(): for key, val in css.items():
style += key + ':%s;'%val style += key + ':%s;'%val
html = u'''\ html = '''\
<html> <html>
<head> <head>
%s %s

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
@ -12,7 +12,7 @@ from copy import deepcopy, copy
from lxml import etree from lxml import etree
from calibre import guess_type from calibre import guess_type
from polyglot.builtins import as_bytes, map from polyglot.builtins import as_bytes, map, unicode_type
class Canvas(etree.XSLTExtension): class Canvas(etree.XSLTExtension):
@ -70,9 +70,9 @@ class Canvas(etree.XSLTExtension):
height = self.styles.to_num(block.get("ysize", None)) height = self.styles.to_num(block.get("ysize", None))
img = div.makeelement('img') img = div.makeelement('img')
if width is not None: if width is not None:
img.set('width', str(int(width))) img.set('width', unicode_type(int(width)))
if height is not None: if height is not None:
img.set('height', str(int(height))) img.set('height', unicode_type(int(height)))
ref = block.get('refstream', None) ref = block.get('refstream', None)
if ref is not None: if ref is not None:
imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref) imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref)
@ -263,13 +263,13 @@ class TextBlock(etree.XSLTExtension):
a.set('href', self.char_button_map[oid]) a.set('href', self.char_button_map[oid])
self.process_container(child, a) self.process_container(child, a)
elif child.tag == 'Plot': elif child.tag == 'Plot':
xsize = self.styles.to_num(child.get('xsize', None), 166./720) xsize = self.styles.to_num(child.get('xsize', None), 166/720)
ysize = self.styles.to_num(child.get('ysize', None), 166./720) ysize = self.styles.to_num(child.get('ysize', None), 166/720)
img = self.root.makeelement('img') img = self.root.makeelement('img')
if xsize is not None: if xsize is not None:
img.set('width', str(int(xsize))) img.set('width', unicode_type(int(xsize)))
if ysize is not None: if ysize is not None:
img.set('height', str(int(ysize))) img.set('height', unicode_type(int(ysize)))
ro = child.get('refobj', None) ro = child.get('refobj', None)
if ro in self.plot_map: if ro in self.plot_map:
img.set('src', self.plot_map[ro]) img.set('src', self.plot_map[ro])
@ -320,8 +320,7 @@ class Styles(etree.XSLTExtension):
def px_to_pt(self, px): def px_to_pt(self, px):
try: try:
px = float(px) return px * 72/166
return px * 72./166.
except: except:
return None return None

View File

@ -1,6 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''''' ''''''

View File

@ -1,4 +1,5 @@
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
@ -21,7 +22,7 @@ from calibre.ebooks.lrf.pylrs.pylrs import (
TextStyle TextStyle
) )
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from polyglot.builtins import string_or_bytes from polyglot.builtins import string_or_bytes, unicode_type
class LrsParser(object): class LrsParser(object):
@ -54,7 +55,7 @@ class LrsParser(object):
for key, val in tag.attrs: for key, val in tag.attrs:
if key in exclude: if key in exclude:
continue continue
result[str(key)] = val result[unicode_type(key)] = val
return result return result
def text_tag_to_element(self, tag): def text_tag_to_element(self, tag):

View File

@ -5,10 +5,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import struct, array, zlib, io, collections, re import struct, array, zlib, io, collections, re
from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
from calibre.constants import ispy3
from calibre import entity_to_unicode, prepare_string_for_xml from calibre import entity_to_unicode, prepare_string_for_xml
from calibre.ebooks.lrf.tags import Tag from calibre.ebooks.lrf.tags import Tag
from polyglot.builtins import unicode_type, string_or_bytes from polyglot.builtins import is_py3, unicode_type
ruby_tags = { ruby_tags = {
0xF575: ['rubyAlignAndAdjust', 'W'], 0xF575: ['rubyAlignAndAdjust', 'W'],
@ -84,7 +83,7 @@ class LRFObject(object):
if h[1] != '' and h[0] != '': if h[1] != '' and h[0] != '':
setattr(self, h[0], val) setattr(self, h[0], val)
else: else:
raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag))) raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, unicode_type(tag)))
def __iter__(self): def __iter__(self):
for i in range(0): for i in range(0):
@ -98,8 +97,8 @@ class LRFContentObject(LRFObject):
tag_map = {} tag_map = {}
def __init__(self, bytes, objects): def __init__(self, byts, objects):
self.stream = bytes if hasattr(bytes, 'read') else io.BytesIO(bytes) self.stream = byts if hasattr(byts, 'read') else io.BytesIO(byts)
length = self.stream_size() length = self.stream_size()
self.objects = objects self.objects = objects
self._contents = [] self._contents = []
@ -122,7 +121,7 @@ class LRFContentObject(LRFObject):
def handle_tag(self, tag): def handle_tag(self, tag):
if tag.id in self.tag_map: if tag.id in self.tag_map:
action = self.tag_map[tag.id] action = self.tag_map[tag.id]
if isinstance(action, string_or_bytes): if isinstance(action, unicode_type):
func, args = action, () func, args = action, ()
else: else:
func, args = action[0], (action[1],) func, args = action[0], (action[1],)
@ -209,7 +208,7 @@ class StyleObject(object):
s += '/>\n' s += '/>\n'
return s return s
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def as_dict(self): def as_dict(self):
@ -257,7 +256,7 @@ class Color(object):
def __str__(self): def __str__(self):
return '0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) return '0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def __len__(self): def __len__(self):
@ -290,7 +289,7 @@ class PageDiv(EmptyPageElement):
return '\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\ return '\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
(self.pain, self.spacesize, self.linewidth, self.color) (self.pain, self.spacesize, self.linewidth, self.color)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -308,7 +307,7 @@ class RuledLine(EmptyPageElement):
return '\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\ return '\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
(self.linelength, self.linetype, self.linewidth, self.linecolor) (self.linelength, self.linetype, self.linewidth, self.linecolor)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -320,13 +319,13 @@ class Wait(EmptyPageElement):
def __str__(self): def __str__(self):
return '\n<Wait time="%d" />\n'%(self.time) return '\n<Wait time="%d" />\n'%(self.time)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
class Locate(EmptyPageElement): class Locate(EmptyPageElement):
pos_map = {1:'bottomleft', 2:'bottomright',3:'topright',4:'topleft', 5:'base'} pos_map = {1:'bottomleft', 2:'bottomright', 3:'topright', 4:'topleft', 5:'base'}
def __init__(self, pos): def __init__(self, pos):
self.pos = self.pos_map[pos] self.pos = self.pos_map[pos]
@ -334,7 +333,7 @@ class Locate(EmptyPageElement):
def __str__(self): def __str__(self):
return '\n<Locate pos="%s" />\n'%(self.pos) return '\n<Locate pos="%s" />\n'%(self.pos)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -347,7 +346,7 @@ class BlockSpace(EmptyPageElement):
return '\n<BlockSpace xspace="%d" yspace="%d" />\n'%\ return '\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
(self.xspace, self.yspace) (self.xspace, self.yspace)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -356,7 +355,7 @@ class Page(LRFStream):
0xF503: ['style_id', 'D'], 0xF503: ['style_id', 'D'],
0xF50B: ['obj_list', 'P'], 0xF50B: ['obj_list', 'P'],
0xF571: ['', ''], 0xF571: ['', ''],
0xF57C: ['parent_page_tree','D'], 0xF57C: ['parent_page_tree', 'D'],
} }
tag_map.update(PageAttr.tag_map) tag_map.update(PageAttr.tag_map)
tag_map.update(LRFStream.tag_map) tag_map.update(LRFStream.tag_map)
@ -378,9 +377,9 @@ class Page(LRFStream):
0xF5D6: 'sound_stop', 0xF5D6: 'sound_stop',
} }
def __init__(self, bytes, objects): def __init__(self, byts, objects):
self.in_blockspace = False self.in_blockspace = False
LRFContentObject.__init__(self, bytes, objects) LRFContentObject.__init__(self, byts, objects)
def link(self, tag): def link(self, tag):
self.close_blockspace() self.close_blockspace()
@ -451,7 +450,7 @@ class Page(LRFStream):
s += '\n</Page>\n' s += '\n</Page>\n'
return s return s
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def to_html(self): def to_html(self):
@ -525,7 +524,7 @@ class TextCSS(object):
fs = getattr(obj, 'fontsize', None) fs = getattr(obj, 'fontsize', None)
if fs is not None: if fs is not None:
ans += item('font-size: %fpt;'%(int(fs)/10.)) ans += item('font-size: %fpt;'%(int(fs)/10))
fw = getattr(obj, 'fontweight', None) fw = getattr(obj, 'fontweight', None)
if fw is not None: if fw is not None:
ans += item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal')) ans += item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal'))
@ -547,10 +546,10 @@ class TextCSS(object):
ans += item('text-align: %s;'%al) ans += item('text-align: %s;'%al)
lh = getattr(obj, 'linespace', None) lh = getattr(obj, 'linespace', None)
if lh is not None: if lh is not None:
ans += item('text-align: %fpt;'%(int(lh)/10.)) ans += item('text-align: %fpt;'%(int(lh)/10))
pi = getattr(obj, 'parindent', None) pi = getattr(obj, 'parindent', None)
if pi is not None: if pi is not None:
ans += item('text-indent: %fpt;'%(int(pi)/10.)) ans += item('text-indent: %fpt;'%(int(pi)/10))
return ans return ans
@ -642,7 +641,7 @@ class Block(LRFStream, TextCSS):
return s return s
return s.rstrip() + ' />\n' return s.rstrip() + ' />\n'
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def to_html(self): def to_html(self):
@ -723,7 +722,7 @@ class Text(LRFStream):
s += '%s="%s" '%(name, val) s += '%s="%s" '%(name, val)
return s.rstrip() + (' />' if self.self_closing else '>') return s.rstrip() + (' />' if self.self_closing else '>')
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def to_html(self): def to_html(self):
@ -881,7 +880,7 @@ class Text(LRFStream):
self.add_text(stream.read(tag.word)) self.add_text(stream.read(tag.word))
elif tag.id in self.__class__.text_tags: # A Text tag elif tag.id in self.__class__.text_tags: # A Text tag
action = self.__class__.text_tags[tag.id] action = self.__class__.text_tags[tag.id]
if isinstance(action, string_or_bytes): if isinstance(action, unicode_type):
getattr(self, action)(tag, stream) getattr(self, action)(tag, stream)
else: else:
getattr(self, action[0])(tag, action[1]) getattr(self, action[0])(tag, action[1])
@ -905,7 +904,7 @@ class Text(LRFStream):
s = '' s = ''
open_containers = collections.deque() open_containers = collections.deque()
for c in self.content: for c in self.content:
if isinstance(c, string_or_bytes): if isinstance(c, unicode_type):
s += prepare_string_for_xml(c).replace('\0', '') s += prepare_string_for_xml(c).replace('\0', '')
elif c is None: elif c is None:
if open_containers: if open_containers:
@ -923,7 +922,7 @@ class Text(LRFStream):
raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
return s return s
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def to_html(self): def to_html(self):
@ -931,7 +930,7 @@ class Text(LRFStream):
open_containers = collections.deque() open_containers = collections.deque()
in_p = False in_p = False
for c in self.content: for c in self.content:
if isinstance(c, string_or_bytes): if isinstance(c, unicode_type):
s += c s += c
elif c is None: elif c is None:
if c.name == 'P': if c.name == 'P':
@ -974,7 +973,7 @@ class Image(LRFObject):
return '<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\ return '<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
(self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -987,7 +986,7 @@ class PutObj(EmptyPageElement):
def __str__(self): def __str__(self):
return '<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj) return '<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -1038,7 +1037,7 @@ class Canvas(LRFStream):
s += '</%s>\n'%(self.__class__.__name__,) s += '</%s>\n'%(self.__class__.__name__,)
return s return s
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
def __iter__(self): def __iter__(self):
@ -1078,7 +1077,7 @@ class ImageStream(LRFStream):
return '<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\ return '<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
(self.id, self.encoding, self.file) (self.id, self.encoding, self.file)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -1170,7 +1169,7 @@ class Button(LRFObject):
s += '</Button>\n' s += '</Button>\n'
return s return s
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
refpage = property(fget=lambda self : self.jump_action(2)[0]) refpage = property(fget=lambda self : self.jump_action(2)[0])
@ -1244,7 +1243,7 @@ class BookAttr(StyleObject, LRFObject):
s += '</BookStyle>\n' s += '</BookStyle>\n'
return s return s
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -1260,7 +1259,7 @@ class TocLabel(object):
def __str__(self): def __str__(self):
return '<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label) return '<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__
@ -1290,7 +1289,7 @@ class TOCObject(LRFStream):
s += unicode_type(i) s += unicode_type(i)
return s + '</TOC>\n' return s + '</TOC>\n'
if not ispy3: if not is_py3:
__unicode__ = __str__ __unicode__ = __str__

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
""" """
This package contains code to generate ebooks in the SONY LRS/F format. It was This package contains code to generate ebooks in the SONY LRS/F format. It was
originally developed by Mike Higgins and has been extended and modified by Kovid originally developed by Mike Higgins and has been extended and modified by Kovid

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
""" elements.py -- replacements and helpers for ElementTree """ """ elements.py -- replacements and helpers for ElementTree """
from polyglot.builtins import unicode_type, string_or_bytes from polyglot.builtins import unicode_type, string_or_bytes
@ -14,7 +16,7 @@ class ElementWriter(object):
self.outputEncodingName = outputEncodingName self.outputEncodingName = outputEncodingName
def _encodeCdata(self, rawText): def _encodeCdata(self, rawText):
if type(rawText) is str: if isinstance(rawText, bytes):
rawText = rawText.decode(self.sourceEncoding) rawText = rawText.decode(self.sourceEncoding)
text = rawText.replace("&", "&amp;") text = rawText.replace("&", "&amp;")
@ -23,20 +25,20 @@ class ElementWriter(object):
return text return text
def _writeAttribute(self, f, name, value): def _writeAttribute(self, f, name, value):
f.write(u' %s="' % unicode_type(name)) f.write(' %s="' % unicode_type(name))
if not isinstance(value, string_or_bytes): if not isinstance(value, string_or_bytes):
value = unicode_type(value) value = unicode_type(value)
value = self._encodeCdata(value) value = self._encodeCdata(value)
value = value.replace('"', '&quot;') value = value.replace('"', '&quot;')
f.write(value) f.write(value)
f.write(u'"') f.write('"')
def _writeText(self, f, rawText): def _writeText(self, f, rawText):
text = self._encodeCdata(rawText) text = self._encodeCdata(rawText)
f.write(text) f.write(text)
def _write(self, f, e): def _write(self, f, e):
f.write(u'<' + unicode_type(e.tag)) f.write('<' + unicode_type(e.tag))
attributes = e.items() attributes = e.items()
attributes.sort() attributes.sort()
@ -44,7 +46,7 @@ class ElementWriter(object):
self._writeAttribute(f, name, value) self._writeAttribute(f, name, value)
if e.text is not None or len(e) > 0: if e.text is not None or len(e) > 0:
f.write(u'>') f.write('>')
if e.text: if e.text:
self._writeText(f, e.text) self._writeText(f, e.text)
@ -52,11 +54,11 @@ class ElementWriter(object):
for e2 in e: for e2 in e:
self._write(f, e2) self._write(f, e2)
f.write(u'</%s>' % e.tag) f.write('</%s>' % e.tag)
else: else:
if self.spaceBeforeClose: if self.spaceBeforeClose:
f.write(' ') f.write(' ')
f.write(u'/>') f.write('/>')
if e.tail is not None: if e.tail is not None:
self._writeText(f, e.tail) self._writeText(f, e.tail)
@ -67,10 +69,10 @@ class ElementWriter(object):
buffer = [] buffer = []
x.write = buffer.append x.write = buffer.append
self.write(x) self.write(x)
return u''.join(buffer) return ''.join(buffer)
def write(self, f): def write(self, f):
if self.header: if self.header:
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName) f.write('<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
self._write(f, self.e) self._write(f, self.e)

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
""" """
pylrf.py -- very low level interface to create lrf files. See pylrs for pylrf.py -- very low level interface to create lrf files. See pylrs for
higher level interface that can use this module to render books to lrf. higher level interface that can use this module to render books to lrf.
@ -10,7 +12,7 @@ import codecs
import os import os
from .pylrfopt import tagListOptimizer from .pylrfopt import tagListOptimizer
from polyglot.builtins import iteritems, string_or_bytes from polyglot.builtins import iteritems, string_or_bytes, unicode_type
PYLRF_VERSION = "1.0" PYLRF_VERSION = "1.0"
@ -83,7 +85,7 @@ def writeWord(f, word):
if int(word) > 65535: if int(word) > 65535:
raise LrfError('Cannot encode a number greater than 65535 in a word.') raise LrfError('Cannot encode a number greater than 65535 in a word.')
if int(word) < 0: if int(word) < 0:
raise LrfError('Cannot encode a number < 0 in a word: '+str(word)) raise LrfError('Cannot encode a number < 0 in a word: '+unicode_type(word))
f.write(struct.pack("<H", int(word))) f.write(struct.pack("<H", int(word)))
@ -490,9 +492,8 @@ class LrfFileStream(LrfStreamBase):
def __init__(self, streamFlags, filename): def __init__(self, streamFlags, filename):
LrfStreamBase.__init__(self, streamFlags) LrfStreamBase.__init__(self, streamFlags)
f = open(filename, "rb") with open(filename, "rb") as f:
self.streamData = f.read() self.streamData = f.read()
f.close()
class LrfObject(object): class LrfObject(object):
@ -510,7 +511,7 @@ class LrfObject(object):
raise LrfError("object name %s not recognized" % name) raise LrfError("object name %s not recognized" % name)
def __str__(self): def __str__(self):
return 'LRFObject: ' + self.name + ", " + str(self.objId) return 'LRFObject: ' + self.name + ", " + unicode_type(self.objId)
def appendLrfTag(self, tag): def appendLrfTag(self, tag):
self.tags.append(tag) self.tags.append(tag)
@ -686,9 +687,8 @@ class LrfWriter(object):
self.tocObjId = obj.objId self.tocObjId = obj.objId
def setThumbnailFile(self, filename, encoding=None): def setThumbnailFile(self, filename, encoding=None):
f = open(filename, "rb") with open(filename, "rb") as f:
self.thumbnailData = f.read() self.thumbnailData = f.read()
f.close()
if encoding is None: if encoding is None:
encoding = os.path.splitext(filename)[1][1:] encoding = os.path.splitext(filename)[1][1:]

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
def _optimize(tagList, tagName, conversion): def _optimize(tagList, tagName, conversion):
# copy the tag of interest plus any text # copy the tag of interest plus any text
newTagList = [] newTagList = []
@ -39,5 +41,3 @@ def tagListOptimizer(tagList):
_optimize(tagList, "fontsize", int) _optimize(tagList, "fontsize", int)
_optimize(tagList, "fontweight", int) _optimize(tagList, "fontweight", int)
return oldSize - len(tagList) return oldSize - len(tagList)

View File

@ -1,4 +1,5 @@
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
# Copyright (c) 2007 Mike Higgins (Falstaff) # Copyright (c) 2007 Mike Higgins (Falstaff)
# Modifications from the original: # Modifications from the original:
# Copyright (C) 2007 Kovid Goyal <kovid@kovidgoyal.net> # Copyright (C) 2007 Kovid Goyal <kovid@kovidgoyal.net>
@ -108,10 +109,9 @@ def appendTextElements(e, contentsList, se):
""" A helper function to convert text streams into the proper elements. """ """ A helper function to convert text streams into the proper elements. """
def uconcat(text, newText, se): def uconcat(text, newText, se):
if type(newText) != type(text): if isinstance(text, bytes):
if type(text) is str:
text = text.decode(se) text = text.decode(se)
else: if isinstance(newText, bytes):
newText = newText.decode(se) newText = newText.decode(se)
return text + newText return text + newText
@ -228,8 +228,8 @@ class LrsAttributes(object):
if name not in self.attrs and name not in alsoAllow: if name not in self.attrs and name not in alsoAllow:
raise LrsError("%s does not support setting %s" % raise LrsError("%s does not support setting %s" %
(self.__class__.__name__, name)) (self.__class__.__name__, name))
if type(value) is int: if isinstance(value, int):
value = str(value) value = unicode_type(value)
self.attrs[name] = value self.attrs[name] = value
@ -333,13 +333,13 @@ class LrsObject(object):
def lrsObjectElement(self, name, objlabel="objlabel", labelName=None, def lrsObjectElement(self, name, objlabel="objlabel", labelName=None,
labelDecorate=True, **settings): labelDecorate=True, **settings):
element = Element(name) element = Element(name)
element.attrib["objid"] = str(self.objId) element.attrib["objid"] = unicode_type(self.objId)
if labelName is None: if labelName is None:
labelName = name labelName = name
if labelDecorate: if labelDecorate:
label = "%s.%d" % (labelName, self.objId) label = "%s.%d" % (labelName, self.objId)
else: else:
label = str(self.objId) label = unicode_type(self.objId)
element.attrib[objlabel] = label element.attrib[objlabel] = label
element.attrib.update(settings) element.attrib.update(settings)
return element return element
@ -565,7 +565,7 @@ class Book(Delegator):
factor = base_font_size / old_base_font_size factor = base_font_size / old_base_font_size
def rescale(old): def rescale(old):
return str(int(int(old) * factor)) return unicode_type(int(int(old) * factor))
text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock))) text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock)))
for tb in text_blocks: for tb in text_blocks:
@ -696,7 +696,7 @@ class TableOfContents(object):
def addTocEntry(self, tocLabel, textBlock): def addTocEntry(self, tocLabel, textBlock):
if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)): if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)):
raise LrsError("TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+ raise LrsError("TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+
" not a " + str(type(textBlock))) " not a " + unicode_type(type(textBlock)))
if textBlock.parent is None: if textBlock.parent is None:
raise LrsError("TOC text block must be already appended to a page") raise LrsError("TOC text block must be already appended to a page")
@ -746,8 +746,8 @@ class TocLabel(object):
def toElement(self, se): def toElement(self, se):
return ElementWithText("TocLabel", self.label, return ElementWithText("TocLabel", self.label,
refobj=str(self.textBlock.objId), refobj=unicode_type(self.textBlock.objId),
refpage=str(self.textBlock.parent.objId)) refpage=unicode_type(self.textBlock.parent.objId))
class BookInfo(object): class BookInfo(object):
@ -808,7 +808,7 @@ class DocInfo(object):
self.thumbnail = None self.thumbnail = None
self.language = "en" self.language = "en"
self.creator = None self.creator = None
self.creationdate = str(isoformat(date.today())) self.creationdate = unicode_type(isoformat(date.today()))
self.producer = "%s v%s"%(__appname__, __version__) self.producer = "%s v%s"%(__appname__, __version__)
self.numberofpages = "0" self.numberofpages = "0"
@ -832,7 +832,7 @@ class DocInfo(object):
docInfo.append(ElementWithText("Creator", self.creator)) docInfo.append(ElementWithText("Creator", self.creator))
docInfo.append(ElementWithText("CreationDate", self.creationdate)) docInfo.append(ElementWithText("CreationDate", self.creationdate))
docInfo.append(ElementWithText("Producer", self.producer)) docInfo.append(ElementWithText("Producer", self.producer))
docInfo.append(ElementWithText("SumPage", str(self.numberofpages))) docInfo.append(ElementWithText("SumPage", unicode_type(self.numberofpages)))
return docInfo return docInfo
@ -1094,7 +1094,7 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
self.elementName = elementName self.elementName = elementName
self.objectsAppended = False self.objectsAppended = False
# self.label = "%s.%d" % (elementName, self.objId) # self.label = "%s.%d" % (elementName, self.objId)
# self.label = str(self.objId) # self.label = unicode_type(self.objId)
# self.parent = None # self.parent = None
def update(self, settings): def update(self, settings):
@ -1104,11 +1104,11 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
self.attrs[name] = value self.attrs[name] = value
def getLabel(self): def getLabel(self):
return str(self.objId) return unicode_type(self.objId)
def toElement(self, se): def toElement(self, se):
element = Element(self.elementName, stylelabel=self.getLabel(), element = Element(self.elementName, stylelabel=self.getLabel(),
objid=str(self.objId)) objid=unicode_type(self.objId))
element.attrib.update(self.attrs) element.attrib.update(self.attrs)
return element return element
@ -1236,14 +1236,14 @@ class PageStyle(LrsStyle):
del settings[evenbase] del settings[evenbase]
if evenObj.parent is None: if evenObj.parent is None:
parent.append(evenObj) parent.append(evenObj)
settings[evenbase + "id"] = str(evenObj.objId) settings[evenbase + "id"] = unicode_type(evenObj.objId)
if oddbase in settings: if oddbase in settings:
oddObj = settings[oddbase] oddObj = settings[oddbase]
del settings[oddbase] del settings[oddbase]
if oddObj.parent is None: if oddObj.parent is None:
parent.append(oddObj) parent.append(oddObj)
settings[oddbase + "id"] = str(oddObj.objId) settings[oddbase + "id"] = unicode_type(oddObj.objId)
def appendReferencedObjects(self, parent): def appendReferencedObjects(self, parent):
if self.objectsAppended: if self.objectsAppended:
@ -1580,7 +1580,7 @@ class DropCaps(LrsTextTag):
return self.text is None or not self.text.strip() return self.text is None or not self.text.strip()
def toElement(self, se): def toElement(self, se):
elem = Element('DrawChar', line=str(self.line)) elem = Element('DrawChar', line=unicode_type(self.line))
appendTextElements(elem, self.contents, se) appendTextElements(elem, self.contents, se)
return elem return elem
@ -1656,7 +1656,7 @@ class JumpTo(LrsContainer):
self.textBlock = textBlock self.textBlock = textBlock
def toElement(self, se): def toElement(self, se):
return Element("JumpTo", refpage=str(self.textBlock.parent.objId), refobj=str(self.textBlock.objId)) return Element("JumpTo", refpage=unicode_type(self.textBlock.parent.objId), refobj=unicode_type(self.textBlock.objId))
class Plot(LrsSimpleChar1, LrsContainer): class Plot(LrsSimpleChar1, LrsContainer):
@ -1688,8 +1688,8 @@ class Plot(LrsSimpleChar1, LrsContainer):
parent.append(self.obj) parent.append(self.obj)
def toElement(self, se): def toElement(self, se):
elem = Element('Plot', xsize=str(self.xsize), ysize=str(self.ysize), elem = Element('Plot', xsize=unicode_type(self.xsize), ysize=unicode_type(self.ysize),
refobj=str(self.obj.objId)) refobj=unicode_type(self.obj.objId))
if self.adjustment: if self.adjustment:
elem.set('adjustment', self.adjustment) elem.set('adjustment', self.adjustment)
return elem return elem
@ -1771,7 +1771,7 @@ class Space(LrsSimpleChar1, LrsContainer):
if self.xsize == 0: if self.xsize == 0:
return return
return Element("Space", xsize=str(self.xsize)) return Element("Space", xsize=unicode_type(self.xsize))
def toLrfContainer(self, lrfWriter, container): def toLrfContainer(self, lrfWriter, container):
if self.xsize != 0: if self.xsize != 0:
@ -1858,7 +1858,7 @@ class Span(LrsSimpleChar1, LrsContainer):
def toElement(self, se): def toElement(self, se):
element = Element('Span') element = Element('Span')
for (key, value) in self.attrs.items(): for (key, value) in self.attrs.items():
element.set(key, str(value)) element.set(key, unicode_type(value))
appendTextElements(element, self.contents, se) appendTextElements(element, self.contents, se)
return element return element
@ -1871,9 +1871,9 @@ class EmpLine(LrsTextTag, LrsSimpleChar1):
def __init__(self, text=None, emplineposition='before', emplinetype='solid'): def __init__(self, text=None, emplineposition='before', emplinetype='solid'):
LrsTextTag.__init__(self, text, [LrsSimpleChar1]) LrsTextTag.__init__(self, text, [LrsSimpleChar1])
if emplineposition not in self.__class__.emplinepositions: if emplineposition not in self.__class__.emplinepositions:
raise LrsError('emplineposition for an EmpLine must be one of: '+str(self.__class__.emplinepositions)) raise LrsError('emplineposition for an EmpLine must be one of: '+unicode_type(self.__class__.emplinepositions))
if emplinetype not in self.__class__.emplinetypes: if emplinetype not in self.__class__.emplinetypes:
raise LrsError('emplinetype for an EmpLine must be one of: '+str(self.__class__.emplinetypes)) raise LrsError('emplinetype for an EmpLine must be one of: '+unicode_type(self.__class__.emplinetypes))
self.emplinetype = emplinetype self.emplinetype = emplinetype
self.emplineposition = emplineposition self.emplineposition = emplineposition
@ -1933,9 +1933,9 @@ class BlockSpace(LrsContainer):
element = Element("BlockSpace") element = Element("BlockSpace")
if self.xspace != 0: if self.xspace != 0:
element.attrib["xspace"] = str(self.xspace) element.attrib["xspace"] = unicode_type(self.xspace)
if self.yspace != 0: if self.yspace != 0:
element.attrib["yspace"] = str(self.yspace) element.attrib["yspace"] = unicode_type(self.yspace)
return element return element
@ -1979,7 +1979,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
container.appendLrfTag(LrfTag("CharButtonEnd")) container.appendLrfTag(LrfTag("CharButtonEnd"))
def toElement(self, se): def toElement(self, se):
cb = Element("CharButton", refobj=str(self.button.objId)) cb = Element("CharButton", refobj=unicode_type(self.button.objId))
appendTextElements(cb, self.contents, se) appendTextElements(cb, self.contents, se)
return cb return cb
@ -2081,8 +2081,8 @@ class JumpButton(LrsObject, LrsContainer):
b = self.lrsObjectElement("Button") b = self.lrsObjectElement("Button")
pb = SubElement(b, "PushButton") pb = SubElement(b, "PushButton")
SubElement(pb, "JumpTo", SubElement(pb, "JumpTo",
refpage=str(self.textBlock.parent.objId), refpage=unicode_type(self.textBlock.parent.objId),
refobj=str(self.textBlock.objId)) refobj=unicode_type(self.textBlock.objId))
return b return b
@ -2230,8 +2230,8 @@ class PutObj(LrsContainer):
self.content.objId))) self.content.objId)))
def toElement(self, se): def toElement(self, se):
el = Element("PutObj", x1=str(self.x1), y1=str(self.y1), el = Element("PutObj", x1=unicode_type(self.x1), y1=unicode_type(self.y1),
refobj=str(self.content.objId)) refobj=unicode_type(self.content.objId))
return el return el
@ -2268,9 +2268,8 @@ class ImageStream(LrsObject, LrsContainer):
self.encoding = encoding self.encoding = encoding
def toLrf(self, lrfWriter): def toLrf(self, lrfWriter):
imageFile = open(self.filename, "rb") with open(self.filename, "rb") as f:
imageData = imageFile.read() imageData = f.read()
imageFile.close()
isObj = LrfObject("ImageStream", self.objId) isObj = LrfObject("ImageStream", self.objId)
if self.comment is not None: if self.comment is not None:
@ -2314,9 +2313,9 @@ class Image(LrsObject, LrsContainer, LrsAttributes):
def toElement(self, se): def toElement(self, se):
element = self.lrsObjectElement("Image", **self.attrs) element = self.lrsObjectElement("Image", **self.attrs)
element.set("refstream", str(self.refstream.objId)) element.set("refstream", unicode_type(self.refstream.objId))
for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
element.set(name, str(getattr(self, name))) element.set(name, unicode_type(getattr(self, name)))
return element return element
def toLrf(self, lrfWriter): def toLrf(self, lrfWriter):
@ -2397,9 +2396,9 @@ class ImageBlock(LrsObject, LrsContainer, LrsAttributes):
def toElement(self, se): def toElement(self, se):
element = self.lrsObjectElement("ImageBlock", **self.attrs) element = self.lrsObjectElement("ImageBlock", **self.attrs)
element.set("refstream", str(self.refstream.objId)) element.set("refstream", unicode_type(self.refstream.objId))
for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
element.set(name, str(getattr(self, name))) element.set(name, unicode_type(getattr(self, name)))
element.text = self.alttext element.text = self.alttext
return element return element

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''''' ''''''
@ -5,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import struct import struct
from calibre.ebooks.lrf import LRFParseError from calibre.ebooks.lrf import LRFParseError
from polyglot.builtins import unicode_type, string_or_bytes from polyglot.builtins import unicode_type
class Tag(object): class Tag(object):
@ -196,7 +198,7 @@ class Tag(object):
self.id = 0xF500 + tag_id[0] self.id = 0xF500 + tag_id[0]
size, self.name = self.__class__.tags[tag_id[0]] size, self.name = self.__class__.tags[tag_id[0]]
if isinstance(size, string_or_bytes): if isinstance(size, unicode_type):
parser = getattr(self, size + '_parser') parser = getattr(self, size + '_parser')
self.contents = parser(stream) self.contents = parser(stream)
else: else:

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -26,9 +28,9 @@ except:
def string_to_authors(raw): def string_to_authors(raw):
if not raw: if not raw:
return [] return []
raw = raw.replace('&&', u'\uffff') raw = raw.replace('&&', '\uffff')
raw = _author_pat.sub('&', raw) raw = _author_pat.sub('&', raw)
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')] authors = [a.strip().replace('\uffff', '&') for a in raw.split('&')]
return [a for a in authors if a] return [a for a in authors if a]
@ -41,7 +43,7 @@ def authors_to_string(authors):
def remove_bracketed_text(src, brackets=None): def remove_bracketed_text(src, brackets=None):
if brackets is None: if brackets is None:
brackets = {u'(': u')', u'[': u']', u'{': u'}'} brackets = {'(': ')', '[': ']', '{': '}'}
from collections import Counter from collections import Counter
counts = Counter() counts = Counter()
buf = [] buf = []
@ -56,12 +58,12 @@ def remove_bracketed_text(src, brackets=None):
counts[idx] -= 1 counts[idx] -= 1
elif sum(itervalues(counts)) < 1: elif sum(itervalues(counts)) < 1:
buf.append(char) buf.append(char)
return u''.join(buf) return ''.join(buf)
def author_to_author_sort(author, method=None): def author_to_author_sort(author, method=None):
if not author: if not author:
return u'' return ''
sauthor = remove_bracketed_text(author).strip() sauthor = remove_bracketed_text(author).strip()
tokens = sauthor.split() tokens = sauthor.split()
if len(tokens) < 2: if len(tokens) < 2:
@ -72,13 +74,13 @@ def author_to_author_sort(author, method=None):
ltoks = frozenset(x.lower() for x in tokens) ltoks = frozenset(x.lower() for x in tokens)
copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords']) copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords'])
if ltoks.intersection(copy_words): if ltoks.intersection(copy_words):
method = u'copy' method = 'copy'
if method == u'copy': if method == 'copy':
return author return author
prefixes = {force_unicode(y).lower() for y in tweaks['author_name_prefixes']} prefixes = {force_unicode(y).lower() for y in tweaks['author_name_prefixes']}
prefixes |= {y+u'.' for y in prefixes} prefixes |= {y+'.' for y in prefixes}
while True: while True:
if not tokens: if not tokens:
return author return author
@ -89,9 +91,9 @@ def author_to_author_sort(author, method=None):
break break
suffixes = {force_unicode(y).lower() for y in tweaks['author_name_suffixes']} suffixes = {force_unicode(y).lower() for y in tweaks['author_name_suffixes']}
suffixes |= {y+u'.' for y in suffixes} suffixes |= {y+'.' for y in suffixes}
suffix = u'' suffix = ''
while True: while True:
if not tokens: if not tokens:
return author return author
@ -103,7 +105,7 @@ def author_to_author_sort(author, method=None):
break break
suffix = suffix.strip() suffix = suffix.strip()
if method == u'comma' and u',' in u''.join(tokens): if method == 'comma' and ',' in ''.join(tokens):
return author return author
atokens = tokens[-1:] + tokens[:-1] atokens = tokens[-1:] + tokens[:-1]
@ -111,10 +113,10 @@ def author_to_author_sort(author, method=None):
if suffix: if suffix:
atokens.append(suffix) atokens.append(suffix)
if method != u'nocomma' and num_toks > 1: if method != 'nocomma' and num_toks > 1:
atokens[0] += u',' atokens[0] += ','
return u' '.join(atokens) return ' '.join(atokens)
def authors_to_sort_string(authors): def authors_to_sort_string(authors):
@ -154,7 +156,7 @@ def get_title_sort_pat(lang=None):
return ans return ans
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in _ignore_starts = '\'"'+''.join(codepoint_to_chr(x) for x in
list(range(0x2018, 0x201e))+[0x2032, 0x2033]) list(range(0x2018, 0x201e))+[0x2032, 0x2033])
@ -187,7 +189,7 @@ coding = list(zip(
def roman(num): def roman(num):
if num <= 0 or num >= 4000 or int(num) != num: if num <= 0 or num >= 4000 or int(num) != num:
return str(num) return unicode_type(num)
result = [] result = []
for d, r in coding: for d, r in coding:
while num >= d: while num >= d:
@ -202,7 +204,7 @@ def fmt_sidx(i, fmt='%.2f', use_roman=False):
try: try:
i = float(i) i = float(i)
except TypeError: except TypeError:
return str(i) return unicode_type(i)
if int(i) == float(i): if int(i) == float(i):
return roman(int(i)) if use_roman else '%d'%int(i) return roman(int(i)) if use_roman else '%d'%int(i)
return fmt%i return fmt%i
@ -312,7 +314,7 @@ class ResourceCollection(object):
return '[%s]'%', '.join(resources) return '[%s]'%', '.join(resources)
def __repr__(self): def __repr__(self):
return str(self) return unicode_type(self)
def append(self, resource): def append(self, resource):
if not isinstance(resource, Resource): if not isinstance(resource, Resource):
@ -374,7 +376,7 @@ def check_isbn13(isbn):
check = 10 - (sum(products)%10) check = 10 - (sum(products)%10)
if check == 10: if check == 10:
check = 0 check = 0
if str(check) == isbn[12]: if unicode_type(check) == isbn[12]:
return isbn return isbn
except Exception: except Exception:
pass pass
@ -430,12 +432,9 @@ def check_doi(doi):
return None return None
def rating_to_stars(value, allow_half_stars=False, star=u'', half=u'½'): def rating_to_stars(value, allow_half_stars=False, star='', half='½'):
r = max(0, min(int(value or 0), 10)) r = max(0, min(int(value or 0), 10))
if allow_half_stars:
ans = star * (r // 2) ans = star * (r // 2)
if r % 2: if allow_half_stars and r % 2:
ans += half ans += half
else:
ans = star * int(r/2.0)
return ans return ans

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
@ -10,7 +10,7 @@ import os
from contextlib import closing from contextlib import closing
from calibre.customize import FileTypePlugin from calibre.customize import FileTypePlugin
from polyglot.builtins import filter from polyglot.builtins import filter, unicode_type
def is_comic(list_of_names): def is_comic(list_of_names):
@ -30,7 +30,7 @@ def archive_type(stream):
ans = None ans = None
if id_ == stringFileHeader: if id_ == stringFileHeader:
ans = 'zip' ans = 'zip'
elif id_.startswith('Rar'): elif id_.startswith(b'Rar'):
ans = 'rar' ans = 'rar'
try: try:
stream.seek(pos) stream.seek(pos)
@ -142,7 +142,7 @@ def get_comic_book_info(d, mi, series_index='volume'):
from datetime import date from datetime import date
try: try:
dt = date(puby, 6 if pubm is None else pubm, 15) dt = date(puby, 6 if pubm is None else pubm, 15)
dt = parse_only_date(str(dt)) dt = parse_only_date(unicode_type(dt))
mi.pubdate = dt mi.pubdate = dt
except: except:
pass pass

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
@ -24,7 +25,7 @@ SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
def human_readable(size, precision=2): def human_readable(size, precision=2):
""" Convert a size in bytes into megabytes """ """ Convert a size in bytes into megabytes """
return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),) return ('%.'+unicode_type(precision)+'f'+ 'MB') % (size/(1024*1024),)
NULL_VALUES = { NULL_VALUES = {
@ -607,13 +608,13 @@ class Metadata(object):
return authors_to_string(self.authors) return authors_to_string(self.authors)
def format_tags(self): def format_tags(self):
return u', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)]) return ', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)])
def format_rating(self, v=None, divide_by=1.0): def format_rating(self, v=None, divide_by=1):
if v is None: if v is None:
if self.rating is not None: if self.rating is not None:
return unicode_type(self.rating/divide_by) return unicode_type(self.rating/divide_by)
return u'None' return 'None'
return unicode_type(v/divide_by) return unicode_type(v/divide_by)
def format_field(self, key, series_with_index=True): def format_field(self, key, series_with_index=True):
@ -662,7 +663,7 @@ class Metadata(object):
elif datatype == 'bool': elif datatype == 'bool':
res = _('Yes') if res else _('No') res = _('Yes') if res else _('No')
elif datatype == 'rating': elif datatype == 'rating':
res = u'%.2g'%(res/2.0) res = '%.2g'%(res/2)
elif datatype in ['int', 'float']: elif datatype in ['int', 'float']:
try: try:
fmt = cmeta['display'].get('number_format', None) fmt = cmeta['display'].get('number_format', None)
@ -702,7 +703,7 @@ class Metadata(object):
elif datatype == 'datetime': elif datatype == 'datetime':
res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy')) res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
elif datatype == 'rating': elif datatype == 'rating':
res = u'%.2g'%(res/2.0) res = '%.2g'%(res/2)
elif key == 'size': elif key == 'size':
res = human_readable(res) res = human_readable(res)
return (name, unicode_type(res), orig_res, fmeta) return (name, unicode_type(res), orig_res, fmeta)
@ -719,7 +720,7 @@ class Metadata(object):
ans = [] ans = []
def fmt(x, y): def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y))) ans.append('%-20s: %s'%(unicode_type(x), unicode_type(y)))
fmt('Title', self.title) fmt('Title', self.title)
if self.title_sort: if self.title_sort:
@ -733,14 +734,14 @@ class Metadata(object):
if getattr(self, 'book_producer', False): if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer) fmt('Book Producer', self.book_producer)
if self.tags: if self.tags:
fmt('Tags', u', '.join([unicode_type(t) for t in self.tags])) fmt('Tags', ', '.join([unicode_type(t) for t in self.tags]))
if self.series: if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index()) fmt('Series', self.series + ' #%s'%self.format_series_index())
if not self.is_null('languages'): if not self.is_null('languages'):
fmt('Languages', ', '.join(self.languages)) fmt('Languages', ', '.join(self.languages))
if self.rating is not None: if self.rating is not None:
fmt('Rating', (u'%.2g'%(float(self.rating)/2.0)) if self.rating fmt('Rating', ('%.2g'%(float(self.rating)/2)) if self.rating
else u'') else '')
if self.timestamp is not None: if self.timestamp is not None:
fmt('Timestamp', isoformat(self.timestamp)) fmt('Timestamp', isoformat(self.timestamp))
if self.pubdate is not None: if self.pubdate is not None:
@ -748,7 +749,7 @@ class Metadata(object):
if self.rights is not None: if self.rights is not None:
fmt('Rights', unicode_type(self.rights)) fmt('Rights', unicode_type(self.rights))
if self.identifiers: if self.identifiers:
fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in fmt('Identifiers', ', '.join(['%s:%s'%(k, v) for k, v in
iteritems(self.identifiers)])) iteritems(self.identifiers)]))
if self.comments: if self.comments:
fmt('Comments', self.comments) fmt('Comments', self.comments)
@ -758,7 +759,7 @@ class Metadata(object):
if val: if val:
(name, val) = self.format_field(key) (name, val) = self.format_field(key)
fmt(name, unicode_type(val)) fmt(name, unicode_type(val))
return u'\n'.join(ans) return '\n'.join(ans)
def to_html(self): def to_html(self):
''' '''
@ -772,10 +773,10 @@ class Metadata(object):
ans += [(_('Producer'), unicode_type(self.book_producer))] ans += [(_('Producer'), unicode_type(self.book_producer))]
ans += [(_('Comments'), unicode_type(self.comments))] ans += [(_('Comments'), unicode_type(self.comments))]
ans += [('ISBN', unicode_type(self.isbn))] ans += [('ISBN', unicode_type(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode_type(t) for t in self.tags]))] ans += [(_('Tags'), ', '.join([unicode_type(t) for t in self.tags]))]
if self.series: if self.series:
ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())] ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Languages'), u', '.join(self.languages))] ans += [(_('Languages'), ', '.join(self.languages))]
if self.timestamp is not None: if self.timestamp is not None:
ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))] ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))]
if self.pubdate is not None: if self.pubdate is not None:
@ -788,8 +789,8 @@ class Metadata(object):
(name, val) = self.format_field(key) (name, val) = self.format_field(key)
ans += [(name, val)] ans += [(name, val)]
for i, x in enumerate(ans): for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x ans[i] = '<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans) return '<table>%s</table>'%'\n'.join(ans)
if ispy3: if ispy3:
__str__ = __unicode__representation__ __str__ = __unicode__representation__

View File

@ -1,9 +1,10 @@
from __future__ import absolute_import, division, print_function, unicode_literals
''' '''
Created on 4 Jun 2010 Created on 4 Jun 2010
@author: charles @author: charles
''' '''
from __future__ import print_function
import json, traceback import json, traceback
from datetime import datetime, time from datetime import datetime, time

View File

@ -1,4 +1,5 @@
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -95,14 +96,14 @@ def config():
def filetypes(): def filetypes():
readers = set([]) readers = set()
for r in metadata_readers(): for r in metadata_readers():
readers = readers.union(set(r.file_types)) readers = readers.union(set(r.file_types))
return readers return readers
def option_parser(): def option_parser():
writers = set([]) writers = set()
for w in metadata_writers(): for w in metadata_writers():
writers = writers.union(set(w.file_types)) writers = writers.union(set(w.file_types))
ft, w = ', '.join(sorted(filetypes())), ', '.join(sorted(writers)) ft, w = ', '.join(sorted(filetypes())), ', '.join(sorted(writers))

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import print_function, with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
''' '''
Read meta information from eReader pdb files. Read meta information from eReader pdb files.
@ -24,7 +25,7 @@ def get_cover(pheader, eheader):
for i in range(eheader.image_count): for i in range(eheader.image_count):
raw = pheader.section_data(eheader.image_data_offset + i) raw = pheader.section_data(eheader.image_data_offset + i)
if raw[4:4 + 32].strip('\x00') == 'cover.png': if raw[4:4 + 32].strip(b'\x00') == b'cover.png':
cover_data = raw[62:] cover_data = raw[62:]
break break
@ -48,7 +49,7 @@ def get_metadata(stream, extract_cover=True):
try: try:
mdata = pheader.section_data(hr.metadata_offset) mdata = pheader.section_data(hr.metadata_offset)
mdata = mdata.split('\x00') mdata = mdata.decode('utf-8').split('\x00')
mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0])
mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])]
mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3])
@ -80,7 +81,7 @@ def set_metadata(stream, mi):
# Create a metadata record for the file if one does not alreay exist # Create a metadata record for the file if one does not alreay exist
if not hr.has_metadata: if not hr.has_metadata:
sections += ['', 'MeTaInFo\x00'] sections += [b'', b'MeTaInFo\x00']
last_data = len(sections) - 1 last_data = len(sections) - 1
for i in range(0, 132, 2): for i in range(0, 132, 2):
@ -95,8 +96,8 @@ def set_metadata(stream, mi):
# Merge the metadata into the file # Merge the metadata into the file
file_mi = get_metadata(stream, False) file_mi = get_metadata(stream, False)
file_mi.smart_update(mi) file_mi.smart_update(mi)
sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \ sections[hr.metadata_offset] = ('%s\x00%s\x00%s\x00%s\x00%s\x00' % \
(file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn) (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn)).encode('utf-8')
# Rebuild the PDB wrapper because the offsets have changed due to the # Rebuild the PDB wrapper because the offsets have changed due to the
# new metadata. # new metadata.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
@ -63,7 +64,8 @@ def set_metadata(stream, mi):
raise Exception('no cover') raise Exception('no cover')
except: except:
try: try:
new_cdata = open(mi.cover, 'rb').read() with open(mi.cover, 'rb') as f:
new_cdata = f.read()
except: except:
pass pass
if new_cdata: if new_cdata:

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>, '\ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>, '\
'2008, Anatoly Shipitsin <norguhtar at gmail.com>' '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
@ -255,8 +257,8 @@ def _parse_tags(root, mi, ctx):
def _parse_series(root, mi, ctx): def _parse_series(root, mi, ctx):
# calibri supports only 1 series: use the 1-st one # calibre supports only 1 series: use the 1-st one
# pick up sequence but only from 1 secrion in preferred order # pick up sequence but only from 1 section in preferred order
# except <src-title-info> # except <src-title-info>
xp_ti = '//fb:title-info/fb:sequence[1]' xp_ti = '//fb:title-info/fb:sequence[1]'
xp_pi = '//fb:publish-info/fb:sequence[1]' xp_pi = '//fb:publish-info/fb:sequence[1]'
@ -283,7 +285,7 @@ def _parse_isbn(root, mi, ctx):
def _parse_comments(root, mi, ctx): def _parse_comments(root, mi, ctx):
# pick up annotation but only from 1 secrion <title-info>; fallback: <src-title-info> # pick up annotation but only from 1 section <title-info>; fallback: <src-title-info>
for annotation_sec in ['title-info', 'src-title-info']: for annotation_sec in ['title-info', 'src-title-info']:
elms_annotation = ctx.XPath('//fb:%s/fb:annotation' % annotation_sec)(root) elms_annotation = ctx.XPath('//fb:%s/fb:annotation' % annotation_sec)(root)
if elms_annotation: if elms_annotation:

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
''' '''
Read meta information from Haodoo.net pdb files. Read meta information from Haodoo.net pdb files.

View File

@ -1,4 +1,5 @@
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>' __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from IMP files''' '''Read meta information from IMP files'''
@ -8,7 +9,7 @@ import sys
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG'] MAGIC = [b'\x00\x01BOOKDOUG', b'\x00\x02BOOKDOUG']
def get_metadata(stream): def get_metadata(stream):
@ -18,18 +19,18 @@ def get_metadata(stream):
stream.seek(0) stream.seek(0)
try: try:
if stream.read(10) not in MAGIC: if stream.read(10) not in MAGIC:
print(u'Couldn\'t read IMP header from file', file=sys.stderr) print('Couldn\'t read IMP header from file', file=sys.stderr)
return mi return mi
def cString(skip=0): def cString(skip=0):
result = '' result = b''
while 1: while 1:
data = stream.read(1) data = stream.read(1)
if data == '\x00': if data == b'\x00':
if not skip: if not skip:
return result return result.decode('utf-8')
skip -= 1 skip -= 1
result, data = '', '' result, data = b'', b''
result += data result += data
stream.read(38) # skip past some uninteresting headers stream.read(38) # skip past some uninteresting headers
@ -44,6 +45,6 @@ def get_metadata(stream):
if category: if category:
mi.category = category mi.category = category
except Exception as err: except Exception as err:
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err)) msg = 'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode_type(err))
print(msg.encode('utf8'), file=sys.stderr) print(msg.encode('utf8'), file=sys.stderr)
return mi return mi

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import print_function
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -20,7 +20,7 @@ from polyglot.urllib import parse_qs, quote_plus
URL = \ URL = \
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping=" "http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
_ignore_starts = u'\'"'+u''.join(codepoint_to_chr(x) for x in list(range(0x2018, 0x201e))+[0x2032, 0x2033]) _ignore_starts = '\'"'+''.join(codepoint_to_chr(x) for x in list(range(0x2018, 0x201e))+[0x2032, 0x2033])
def get_series(title, authors, timeout=60): def get_series(title, authors, timeout=60):

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -39,7 +41,7 @@ def get_metadata(f):
read = lambda at, amount: _read(f, at, amount) read = lambda at, amount: _read(f, at, amount)
f.seek(0) f.seek(0)
buf = f.read(12) buf = f.read(12)
if buf[4:] == 'ftypLRX2': if buf[4:] == b'ftypLRX2':
offset = 0 offset = 0
while True: while True:
offset += word_be(buf[:4]) offset += word_be(buf[:4])
@ -47,7 +49,7 @@ def get_metadata(f):
buf = read(offset, 8) buf = read(offset, 8)
except: except:
raise ValueError('Not a valid LRX file') raise ValueError('Not a valid LRX file')
if buf[4:] == 'bbeb': if buf[4:] == b'bbeb':
break break
offset += 8 offset += 8
buf = read(offset, 16) buf = read(offset, 16)
@ -80,8 +82,7 @@ def get_metadata(f):
mi.language = root.find('DocInfo').find('Language').text mi.language = root.find('DocInfo').find('Language').text
return mi return mi
elif buf[4:8] == 'LRX': elif buf[4:8] == b'LRX':
raise ValueError('Librie LRX format not supported') raise ValueError('Librie LRX format not supported')
else: else:
raise ValueError('Not a LRX file') raise ValueError('Not a LRX file')

View File

@ -1,4 +1,5 @@
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -10,7 +11,7 @@ from calibre.ebooks.metadata.opf2 import OPF
from calibre import isbytestring from calibre import isbytestring
from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from polyglot.builtins import getcwd from polyglot.builtins import getcwd, unicode_type
_METADATA_PRIORITIES = [ _METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm', 'html', 'htm', 'xhtml', 'xhtm',
@ -214,7 +215,8 @@ def opf_metadata(opfpath):
cpath = os.path.join(os.path.dirname(opfpath), opf.cover) cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
if os.access(cpath, os.R_OK): if os.access(cpath, os.R_OK):
fmt = cpath.rpartition('.')[-1] fmt = cpath.rpartition('.')[-1]
data = open(cpath, 'rb').read() with open(cpath, 'rb') as f:
data = f.read()
mi.cover_data = (fmt, data) mi.cover_data = (fmt, data)
return mi return mi
except: except:
@ -230,7 +232,7 @@ def forked_read_metadata(path, tdir):
f.seek(0, 2) f.seek(0, 2)
sz = f.tell() sz = f.tell()
with lopen(os.path.join(tdir, 'size.txt'), 'wb') as s: with lopen(os.path.join(tdir, 'size.txt'), 'wb') as s:
s.write(str(sz).encode('ascii')) s.write(unicode_type(sz).encode('ascii'))
f.seek(0) f.seek(0)
mi = get_metadata(f, fmt) mi = get_metadata(f, fmt)
if mi.cover_data and mi.cover_data[1]: if mi.cover_data and mi.cover_data[1]:

View File

@ -2,8 +2,7 @@
Retrieve and modify in-place Mobipocket book metadata. Retrieve and modify in-place Mobipocket book metadata.
''' '''
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import print_function
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
@ -398,11 +397,11 @@ class MetadataUpdater(object):
self.original_exth_records.get(501, None) == 'EBOK' and self.original_exth_records.get(501, None) == 'EBOK' and
not added_501 and not share_not_sync): not added_501 and not share_not_sync):
from uuid import uuid4 from uuid import uuid4
update_exth_record((113, str(uuid4()))) update_exth_record((113, unicode_type(uuid4())))
# Add a 112 record with actual UUID # Add a 112 record with actual UUID
if getattr(mi, 'uuid', None): if getattr(mi, 'uuid', None):
update_exth_record((112, update_exth_record((112,
(u"calibre:%s" % mi.uuid).encode(self.codec, 'replace'))) ("calibre:%s" % mi.uuid).encode(self.codec, 'replace')))
if 503 in self.original_exth_records: if 503 in self.original_exth_records:
update_exth_record((503, mi.title.encode(self.codec, 'replace'))) update_exth_record((503, mi.title.encode(self.codec, 'replace')))
@ -444,7 +443,10 @@ class MetadataUpdater(object):
if mi.cover_data[1] or mi.cover: if mi.cover_data[1] or mi.cover:
try: try:
data = mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read() data = mi.cover_data[1]
if not data:
with open(mi.cover, 'rb') as f:
data = f.read()
except: except:
pass pass
else: else:

View File

@ -1,5 +1,4 @@
#!/usr/bin/python2 #!/usr/bin/python2
# -*- coding: utf-8 -*-
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
# #
# Copyright (C) 2006 Søren Roug, European Environment Agency # Copyright (C) 2006 Søren Roug, European Environment Agency
@ -19,7 +18,7 @@
# #
# Contributor(s): # Contributor(s):
# #
from __future__ import division from __future__ import absolute_import, division, print_function, unicode_literals
import zipfile, re, io import zipfile, re, io
import xml.sax.saxutils import xml.sax.saxutils
@ -37,32 +36,32 @@ from polyglot.builtins import string_or_bytes
whitespace = re.compile(r'\s+') whitespace = re.compile(r'\s+')
fields = { fields = {
'title': (DCNS,u'title'), 'title': (DCNS, 'title'),
'description': (DCNS,u'description'), 'description': (DCNS, 'description'),
'subject': (DCNS,u'subject'), 'subject': (DCNS, 'subject'),
'creator': (DCNS,u'creator'), 'creator': (DCNS, 'creator'),
'date': (DCNS,u'date'), 'date': (DCNS, 'date'),
'language': (DCNS,u'language'), 'language': (DCNS, 'language'),
'generator': (METANS,u'generator'), 'generator': (METANS, 'generator'),
'initial-creator': (METANS,u'initial-creator'), 'initial-creator': (METANS, 'initial-creator'),
'keyword': (METANS,u'keyword'), 'keyword': (METANS, 'keyword'),
'editing-duration': (METANS,u'editing-duration'), 'editing-duration': (METANS, 'editing-duration'),
'editing-cycles': (METANS,u'editing-cycles'), 'editing-cycles': (METANS, 'editing-cycles'),
'printed-by': (METANS,u'printed-by'), 'printed-by': (METANS, 'printed-by'),
'print-date': (METANS,u'print-date'), 'print-date': (METANS, 'print-date'),
'creation-date': (METANS,u'creation-date'), 'creation-date': (METANS, 'creation-date'),
'user-defined': (METANS,u'user-defined'), 'user-defined': (METANS, 'user-defined'),
# 'template': (METANS,u'template'), # 'template': (METANS, 'template'),
} }
def normalize(str): def normalize(s):
""" """
The normalize-space function returns the argument string with whitespace The normalize-space function returns the argument string with whitespace
normalized by stripping leading and trailing whitespace and replacing normalized by stripping leading and trailing whitespace and replacing
sequences of whitespace characters by a single space. sequences of whitespace characters by a single space.
""" """
return whitespace.sub(' ', str).strip() return whitespace.sub(' ', s).strip()
class MetaCollector: class MetaCollector:
@ -75,9 +74,9 @@ class MetaCollector:
self._content = [] self._content = []
self.dowrite = True self.dowrite = True
def write(self, str): def write(self, s):
if self.dowrite: if self.dowrite:
self._content.append(str) self._content.append(s)
def content(self): def content(self):
return ''.join(self._content) return ''.join(self._content)
@ -107,8 +106,8 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
# location and not at the end # location and not at the end
# if name == (METANS,u'template'): # if name == (METANS,u'template'):
# self._data = [attrs.get((XLINKNS,u'title'),'')] # self._data = [attrs.get((XLINKNS,u'title'),'')]
if name == (METANS,u'user-defined'): if name == (METANS, 'user-defined'):
field = attrs.get((METANS,u'name')) field = attrs.get((METANS, 'name'))
if field in self.deletefields: if field in self.deletefields:
self.output.dowrite = False self.output.dowrite = False
elif field in self.yieldfields: elif field in self.yieldfields:
@ -120,15 +119,15 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
def endElementNS(self, name, qname): def endElementNS(self, name, qname):
field = name field = name
if name == (METANS,u'user-defined'): if name == (METANS, 'user-defined'):
field = self._tag field = self._tag
if name == (OFFICENS,u'meta'): if name == (OFFICENS, 'meta'):
for k,v in self.addfields.items(): for k,v in self.addfields.items():
if len(v) > 0: if len(v) > 0:
if isinstance(k, string_or_bytes): if isinstance(k, string_or_bytes):
xml.sax.saxutils.XMLGenerator.startElementNS(self,(METANS,u'user-defined'),None,{(METANS,u'name'):k}) xml.sax.saxutils.XMLGenerator.startElementNS(self,(METANS, 'user-defined'),None,{(METANS, 'name'):k})
xml.sax.saxutils.XMLGenerator.characters(self, v) xml.sax.saxutils.XMLGenerator.characters(self, v)
xml.sax.saxutils.XMLGenerator.endElementNS(self, (METANS,u'user-defined'),None) xml.sax.saxutils.XMLGenerator.endElementNS(self, (METANS, 'user-defined'),None)
else: else:
xml.sax.saxutils.XMLGenerator.startElementNS(self, k, None, {}) xml.sax.saxutils.XMLGenerator.startElementNS(self, k, None, {})
xml.sax.saxutils.XMLGenerator.characters(self, v) xml.sax.saxutils.XMLGenerator.characters(self, v)
@ -140,7 +139,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
self.seenfields[texttag] = self.data() self.seenfields[texttag] = self.data()
# OpenOffice has the habit to capitalize custom properties, so we add a # OpenOffice has the habit to capitalize custom properties, so we add a
# lowercase version for easy access # lowercase version for easy access
if texttag[:4].lower() == u'opf.': if texttag[:4].lower() == 'opf.':
self.seenfields[texttag.lower()] = self.data() self.seenfields[texttag.lower()] = self.data()
if field in self.deletefields: if field in self.deletefields:
@ -245,7 +244,7 @@ def read_cover(stream, zin, mi, opfmeta, extract_cover):
except Exception: except Exception:
continue continue
imgnum += 1 imgnum += 1
if opfmeta and frm.getAttribute('name').lower() == u'opf.cover': if opfmeta and frm.getAttribute('name').lower() == 'opf.cover':
cover_href = i_href cover_href = i_href
cover_data = (fmt, raw) cover_data = (fmt, raw)
cover_frame = frm.getAttribute('name') # could have upper case cover_frame = frm.getAttribute('name') # could have upper case

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
''' '''
Read meta information from pdb files. Read meta information from pdb files.
@ -59,4 +60,3 @@ def set_metadata(stream, mi):
stream.seek(0) stream.seek(0)
stream.write('%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace')) stream.write('%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', mi.title).ljust(31, '\x00')[:31].encode('ascii', 'replace'))

View File

@ -1,4 +1,5 @@
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files''' '''Read meta information from PDF files'''
@ -47,9 +48,9 @@ def read_info(outputdir, get_cover):
return None return None
for line in info_raw.splitlines(): for line in info_raw.splitlines():
if u':' not in line: if ':' not in line:
continue continue
field, val = line.partition(u':')[::2] field, val = line.partition(':')[::2]
val = val.strip() val = val.strip()
if field and val: if field and val:
ans[field] = val.strip() ans[field] = val.strip()

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -24,7 +25,7 @@ def get_metadata(stream, extract_cover=True):
mi = MetaInformation(_('Unknown'), [_('Unknown')]) mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0) stream.seek(0)
pml = '' pml = b''
if stream.name.endswith('.pmlz'): if stream.name.endswith('.pmlz'):
with TemporaryDirectory('_unpmlz') as tdir: with TemporaryDirectory('_unpmlz') as tdir:
zf = ZipFile(stream) zf = ZipFile(stream)
@ -41,22 +42,22 @@ def get_metadata(stream, extract_cover=True):
if extract_cover: if extract_cover:
mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))
for comment in re.findall(r'(?mus)\\v.*?\\v', pml): for comment in re.findall(br'(?ms)\\v.*?\\v', pml):
m = re.search(r'TITLE="(.*?)"', comment) m = re.search(br'TITLE="(.*?)"', comment)
if m: if m:
mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
m = re.search(r'AUTHOR="(.*?)"', comment) m = re.search(br'AUTHOR="(.*?)"', comment)
if m: if m:
if mi.authors == [_('Unknown')]: if mi.authors == [_('Unknown')]:
mi.authors = [] mi.authors = []
mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
m = re.search(r'PUBLISHER="(.*?)"', comment) m = re.search(br'PUBLISHER="(.*?)"', comment)
if m: if m:
mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
m = re.search(r'COPYRIGHT="(.*?)"', comment) m = re.search(br'COPYRIGHT="(.*?)"', comment)
if m: if m:
mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
m = re.search(r'ISBN="(.*?)"', comment) m = re.search(br'ISBN="(.*?)"', comment)
if m: if m:
mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
@ -34,5 +36,3 @@ def get_metadata(stream):
mi.timestamp = None mi.timestamp = None
return mi return mi
raise ValueError('No ebook found in RAR archive') raise ValueError('No ebook found in RAR archive')

View File

@ -1,4 +1,5 @@
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>' __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from RB files''' '''Read meta information from RB files'''
@ -19,7 +20,7 @@ def get_metadata(stream):
stream.seek(0) stream.seek(0)
try: try:
if not stream.read(14) == MAGIC: if not stream.read(14) == MAGIC:
print(u'Couldn\'t read RB header from file', file=sys.stderr) print('Couldn\'t read RB header from file', file=sys.stderr)
return mi return mi
stream.read(10) stream.read(10)
@ -34,7 +35,7 @@ def get_metadata(stream):
if flag == 2: if flag == 2:
break break
else: else:
print(u'Couldn\'t find INFO from RB file', file=sys.stderr) print('Couldn\'t find INFO from RB file', file=sys.stderr)
return mi return mi
stream.seek(offset) stream.seek(offset)
@ -48,7 +49,7 @@ def get_metadata(stream):
elif key.strip() == 'AUTHOR': elif key.strip() == 'AUTHOR':
mi.authors = string_to_authors(value) mi.authors = string_to_authors(value)
except Exception as err: except Exception as err:
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err)) msg = 'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode_type(err))
prints(msg, file=sys.stderr) prints(msg, file=sys.stderr)
raise raise
return mi return mi

View File

@ -1,6 +1,6 @@
'''Read meta information from SNB files''' '''Read meta information from SNB files'''
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>' __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'

View File

@ -1,9 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'

View File

@ -208,18 +208,18 @@ class Worker(Thread): # Get details {{{
12: ['diciembre'], 12: ['diciembre'],
}, },
'jp': { 'jp': {
1: [u'1月'], 1: ['1月'],
2: [u'2月'], 2: ['2月'],
3: [u'3月'], 3: ['3月'],
4: [u'4月'], 4: ['4月'],
5: [u'5月'], 5: ['5月'],
6: [u'6月'], 6: ['6月'],
7: [u'7月'], 7: ['7月'],
8: [u'8月'], 8: ['8月'],
9: [u'9月'], 9: ['9月'],
10: [u'10月'], 10: ['10月'],
11: [u'11月'], 11: ['11月'],
12: [u'12月'], 12: ['12月'],
}, },
'nl': { 'nl': {
1: ['januari'], 2: ['februari'], 3: ['maart'], 5: ['mei'], 6: ['juni'], 7: ['juli'], 8: ['augustus'], 10: ['oktober'], 1: ['januari'], 2: ['februari'], 3: ['maart'], 5: ['mei'], 6: ['juni'], 7: ['juli'], 8: ['augustus'], 10: ['oktober'],
@ -294,10 +294,10 @@ class Worker(Thread): # Get details {{{
'ita': ('Italian', 'Italiano'), 'ita': ('Italian', 'Italiano'),
'deu': ('German', 'Deutsch'), 'deu': ('German', 'Deutsch'),
'spa': ('Spanish', 'Espa\xf1ol', 'Espaniol'), 'spa': ('Spanish', 'Espa\xf1ol', 'Espaniol'),
'jpn': ('Japanese', u'日本語'), 'jpn': ('Japanese', '日本語'),
'por': ('Portuguese', 'Português'), 'por': ('Portuguese', 'Português'),
'nld': ('Dutch', 'Nederlands',), 'nld': ('Dutch', 'Nederlands',),
'chs': ('Chinese', u'中文', u'简体中文'), 'chs': ('Chinese', '中文', '简体中文'),
} }
self.lang_map = {} self.lang_map = {}
for code, names in lm.items(): for code, names in lm.items():
@ -351,7 +351,7 @@ class Worker(Thread): # Get details {{{
if self.testing: if self.testing:
import tempfile import tempfile
import uuid import uuid
with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4())) + '_', with tempfile.NamedTemporaryFile(prefix=(asin or type('')(uuid.uuid4())) + '_',
suffix='.html', delete=False) as f: suffix='.html', delete=False) as f:
f.write(raw) f.write(raw)
print('Downloaded html for', asin, 'saved in', f.name) print('Downloaded html for', asin, 'saved in', f.name)
@ -1134,9 +1134,9 @@ class Amazon(Source):
# magic parameter to enable Japanese Shift_JIS encoding. # magic parameter to enable Japanese Shift_JIS encoding.
if domain == 'jp': if domain == 'jp':
q['__mk_ja_JP'] = u'カタカナ' q['__mk_ja_JP'] = 'カタカナ'
if domain == 'nl': if domain == 'nl':
q['__mk_nl_NL'] = u'ÅMÅŽÕÑ' q['__mk_nl_NL'] = 'ÅMÅŽÕÑ'
if 'field-keywords' not in q: if 'field-keywords' not in q:
q['field-keywords'] = '' q['field-keywords'] = ''
for f in 'field-isbn field-title field-author'.split(): for f in 'field-isbn field-title field-author'.split():
@ -1604,18 +1604,18 @@ def manual_tests(domain, **kw): # {{{
all_tests['jp'] = [ # {{{ all_tests['jp'] = [ # {{{
( # Adult filtering test ( # Adult filtering test
{'identifiers': {'isbn': '4799500066'}}, {'identifiers': {'isbn': '4799500066'}},
[title_test(u' '), ] [title_test(' '), ]
), ),
( # isbn -> title, authors ( # isbn -> title, authors
{'identifiers': {'isbn': '9784101302720'}}, {'identifiers': {'isbn': '9784101302720'}},
[title_test(u'精霊の守り人', [title_test('精霊の守り人',
exact=True), authors_test([u'上橋 菜穂子']) exact=True), authors_test(['上橋 菜穂子'])
] ]
), ),
( # title, authors -> isbn (will use Shift_JIS encoding in query.) ( # title, authors -> isbn (will use Shift_JIS encoding in query.)
{'title': u'考えない練習', {'title': '考えない練習',
'authors': [u'小池 龍之介']}, 'authors': ['小池 龍之介']},
[isbn_test('9784093881067'), ] [isbn_test('9784093881067'), ]
), ),
] # }}} ] # }}}

View File

@ -139,7 +139,7 @@ def get_basic_data(browser, log, *skus):
'orderID': '0', 'orderID': '0',
'mailingID': '', 'mailingID': '',
'tContentWidth': '926', 'tContentWidth': '926',
'originalOrder': ','.join(str(i) for i in range(len(skus))), 'originalOrder': ','.join(type('')(i) for i in range(len(skus))),
'selectedOrderID': '0', 'selectedOrderID': '0',
'selectedSortColumn': '0', 'selectedSortColumn': '0',
'listType': '1', 'listType': '1',
@ -255,7 +255,7 @@ class Edelweiss(Source):
return None return None
params = { params = {
'q': (' '.join(keywords)).encode('utf-8'), 'q': (' '.join(keywords)).encode('utf-8'),
'_': str(int(time.time())) '_': type('')(int(time.time()))
} }
return BASE_URL+urlencode(params) return BASE_URL+urlencode(params)

View File

@ -121,7 +121,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
# ISBN # ISBN
isbns = [] isbns = []
for x in identifier(extra): for x in identifier(extra):
t = str(x.text).strip() t = type('')(x.text).strip()
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'): if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
if t[:5].upper() == 'ISBN:': if t[:5].upper() == 'ISBN:':
t = check_isbn(t[5:]) t = check_isbn(t[5:])
@ -220,12 +220,10 @@ class GoogleBooks(Source):
if author_tokens: if author_tokens:
q += ('+' if q else '') + build_term('author', author_tokens) q += ('+' if q else '') + build_term('author', author_tokens)
if isinstance(q, type(u'')):
q = q.encode('utf-8')
if not q: if not q:
return None return None
return BASE_URL + urlencode({ return BASE_URL + urlencode({
'q': q, 'q': q.encode('utf-8'),
'max-results': 20, 'max-results': 20,
'start-index': 1, 'start-index': 1,
'min-viewability': 'none', 'min-viewability': 'none',

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from __future__ import print_function from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid at kovidgoyal.net>'
@ -69,7 +70,7 @@ class TOC(list):
def __str__(self): def __str__(self):
lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)] lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)]
for child in self: for child in self:
c = str(child).splitlines() c = unicode_type(child).splitlines()
for l in c: for l in c:
lines.append('\t'+l) lines.append('\t'+l)
return '\n'.join(lines) return '\n'.join(lines)
@ -179,7 +180,8 @@ class TOC(list):
def read_ncx_toc(self, toc, root=None): def read_ncx_toc(self, toc, root=None):
self.base_path = os.path.dirname(toc) self.base_path = os.path.dirname(toc)
if root is None: if root is None:
raw = xml_to_unicode(open(toc, 'rb').read(), assume_utf8=True, with open(toc, 'rb') as f:
raw = xml_to_unicode(f.read(), assume_utf8=True,
strip_encoding_pats=True)[0] strip_encoding_pats=True)[0]
root = etree.fromstring(raw, parser=etree.XMLParser(recover=True, root = etree.fromstring(raw, parser=etree.XMLParser(recover=True,
no_network=True)) no_network=True))
@ -233,7 +235,9 @@ class TOC(list):
def read_html_toc(self, toc): def read_html_toc(self, toc):
self.base_path = os.path.dirname(toc) self.base_path = os.path.dirname(toc)
for href, fragment, txt in parse_html_toc(lopen(toc, 'rb').read()): with lopen(toc, 'rb') as f:
parsed_toc = parse_html_toc(f.read())
for href, fragment, txt in parsed_toc:
add = True add = True
for i in self.flat(): for i in self.flat():
if i.href == href and i.fragment == fragment: if i.href == href and i.fragment == fragment:
@ -245,8 +249,8 @@ class TOC(list):
def render(self, stream, uid): def render(self, stream, uid):
root = E.ncx( root = E.ncx(
E.head( E.head(
E.meta(name='dtb:uid', content=str(uid)), E.meta(name='dtb:uid', content=unicode_type(uid)),
E.meta(name='dtb:depth', content=str(self.depth())), E.meta(name='dtb:depth', content=unicode_type(self.depth())),
E.meta(name='dtb:generator', content='%s (%s)'%(__appname__, E.meta(name='dtb:generator', content='%s (%s)'%(__appname__,
__version__)), __version__)),
E.meta(name='dtb:totalPageCount', content='0'), E.meta(name='dtb:totalPageCount', content='0'),
@ -271,7 +275,7 @@ class TOC(list):
E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment)) E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment))
if np.fragment else '')), if np.fragment else '')),
id=item_id, id=item_id,
playOrder=str(np.play_order) playOrder=unicode_type(np.play_order)
) )
au = getattr(np, 'author', None) au = getattr(np, 'author', None)
if au: if au:

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import print_function
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'

View File

@ -1,4 +1,5 @@
from __future__ import with_statement from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'