Sync to trunk.

This commit is contained in:
John Schember 2012-03-27 18:57:42 -04:00
commit fc10b88ba3
22 changed files with 416 additions and 135 deletions

View File

@ -1,8 +1,9 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011' __copyright__ = '2012'
''' '''
lemonde.fr lemonde.fr
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class LeMonde(BasicNewsRecipe): class LeMonde(BasicNewsRecipe):
@ -24,7 +25,7 @@ class LeMonde(BasicNewsRecipe):
.ariane{font-size:xx-small;} .ariane{font-size:xx-small;}
.source{font-size:xx-small;} .source{font-size:xx-small;}
#.href{font-size:xx-small;} #.href{font-size:xx-small;}
.LM_caption{color:#666666; font-size:x-small;} #.figcaption style{color:#666666; font-size:x-small;}
#.main-article-info{font-family:Arial,Helvetica,sans-serif;} #.main-article-info{font-family:Arial,Helvetica,sans-serif;}
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
@ -40,8 +41,88 @@ class LeMonde(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
auto_cleanup = True filterDuplicates = True
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
preprocess_regexps = [
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
(re.compile(r'([0-9])([0-9])([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + m.group(2) + m.group(3) + ' ' + m.group(4) + m.group(5) + m.group(6)),
(re.compile(r'([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + ' ' + m.group(2) + m.group(3) + m.group(4)),
(re.compile(r'<span>'), lambda match: ' <span>'),
(re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'&ldquo;'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'&rdquo;'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'>\''), lambda match: '>&lsquo;'),
(re.compile(r' \''), lambda match: ' &lsquo;'),
(re.compile(r' &quot;'), lambda match: ' &laquo;&nbsp;'),
(re.compile(r'>&quot;'), lambda match: '>&laquo;&nbsp;'),
(re.compile(r'&quot;<'), lambda match: '&nbsp;&raquo;<'),
(re.compile(r'&quot; '), lambda match: '&nbsp;&raquo; '),
(re.compile(r'&quot;,'), lambda match: '&nbsp;&raquo;,'),
(re.compile(r'\''), lambda match: '&rsquo;'),
(re.compile(r'"<em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<a href='), lambda match: '&laquo;&nbsp;<a href='),
(re.compile(r'</em>"'), lambda match: '&nbsp;&raquo;</em>'),
(re.compile(r'</a>"'), lambda match: '&nbsp;&raquo;</a>'),
(re.compile(r'"</'), lambda match: '&nbsp;&raquo;</'),
(re.compile(r'>"'), lambda match: '>&laquo;&nbsp;'),
(re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'),
(re.compile(r'&rsquo;"'), lambda match: '&rsquo;«&nbsp;'),
(re.compile(r' "'), lambda match: ' &laquo;&nbsp;'),
(re.compile(r'" '), lambda match: '&nbsp;&raquo; '),
(re.compile(r'"\.'), lambda match: '&nbsp;&raquo;.'),
(re.compile(r'",'), lambda match: '&nbsp;&raquo;,'),
(re.compile(r'"\?'), lambda match: '&nbsp;&raquo;?'),
(re.compile(r'":'), lambda match: '&nbsp;&raquo;:'),
(re.compile(r'";'), lambda match: '&nbsp;&raquo;;'),
(re.compile(r'"\!'), lambda match: '&nbsp;&raquo;!'),
(re.compile(r' :'), lambda match: '&nbsp;:'),
(re.compile(r' ;'), lambda match: '&nbsp;;'),
(re.compile(r' \?'), lambda match: '&nbsp;?'),
(re.compile(r' \!'), lambda match: '&nbsp;!'),
(re.compile(r'\s»'), lambda match: '&nbsp;»'),
(re.compile(r'«\s'), lambda match: '«&nbsp;'),
(re.compile(r' %'), lambda match: '&nbsp;%'),
(re.compile(r'\.jpg&nbsp;&raquo; width='), lambda match: '.jpg'),
(re.compile(r'\.png&nbsp;&raquo; width='), lambda match: '.png'),
(re.compile(r' &ndash; '), lambda match: '&nbsp;&ndash; '),
(re.compile(r'figcaption style="display:none"'), lambda match: 'figcaption'),
(re.compile(r' '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' - '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' -,'), lambda match: '&nbsp;&ndash;,'),
(re.compile(r'&raquo;:'), lambda match: '&raquo;&nbsp;:'),
]
keep_only_tags = [
dict(name='div', attrs={'class':['global']})
]
remove_tags = [
dict(name='div', attrs={'class':['bloc_base meme_sujet']}),
dict(name='p', attrs={'class':['lire']})
]
remove_tags_after = [dict(id='fb-like')]
def get_article_url(self, article):
url = article.get('guid', None)
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
url = None
return url
# def get_article_url(self, article):
# link = article.get('link')
# if 'blog' not in link and ('chat' not in link):
# return link
feeds = [ feeds = [
('A la une', 'http://www.lemonde.fr/rss/une.xml'), ('A la une', 'http://www.lemonde.fr/rss/une.xml'),
@ -66,11 +147,3 @@ class LeMonde(BasicNewsRecipe):
cover_url = link_item.img['src'] cover_url = link_item.img['src']
return cover_url return cover_url
def get_article_url(self, article):
url = article.get('guid', None)
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
url = None
return url

View File

@ -48,7 +48,7 @@ class Push(Command):
threads = [] threads = []
for host in ( for host in (
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre', r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
'kovid@leopard_test:calibre', 'kovid@ox:calibre',
r'kovid@win7:/cygdrive/c/Users/kovid/calibre', r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
): ):
rcmd = BASE_RSYNC + EXCLUDES + ['.', host] rcmd = BASE_RSYNC + EXCLUDES + ['.', host]

View File

@ -187,7 +187,7 @@ class ANDROID(USBMS):
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107', 'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855', 'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW', 'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
'KTABLET_PC', 'INGENIC'] 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -195,7 +195,7 @@ class ANDROID(USBMS):
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853', 'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD', 'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC', 'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET'] 'FILE-CD_GADGET', 'GT-I9001_CARD']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -205,7 +205,10 @@ class EXTHHeader(object):
@property @property
def kf8_header_index(self): def kf8_header_index(self):
return self.get(121, None) ans = self.get(121, None)
if ans == NULL_INDEX:
ans = None
return ans
def __str__(self): def __str__(self):
ans = ['*'*20 + ' EXTH Header '+ '*'*20] ans = ['*'*20 + ' EXTH Header '+ '*'*20]
@ -467,8 +470,14 @@ class MOBIFile(object):
if mh.file_version >= 8: if mh.file_version >= 8:
self.kf8_type = 'standalone' self.kf8_type = 'standalone'
elif mh.has_exth and mh.exth.kf8_header_index is not None: elif mh.has_exth and mh.exth.kf8_header_index is not None:
self.kf8_type = 'joint'
kf8i = mh.exth.kf8_header_index kf8i = mh.exth.kf8_header_index
try:
rec = self.records[kf8i-1]
except IndexError:
pass
else:
if rec.raw == b'BOUNDARY':
self.kf8_type = 'joint'
mh8 = MOBIHeader(self.records[kf8i], kf8i) mh8 = MOBIHeader(self.records[kf8i], kf8i)
self.mobi8_header = mh8 self.mobi8_header = mh8

View File

@ -7,9 +7,10 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os import sys, os, imghdr
from calibre.ebooks.mobi.debug.headers import TextRecord from calibre.ebooks.mobi.debug.headers import TextRecord
from calibre.ebooks.mobi.utils import read_font_record
class MOBIFile(object): class MOBIFile(object):
@ -30,6 +31,7 @@ class MOBIFile(object):
first_text_record+offset+h8.number_of_text_records])] first_text_record+offset+h8.number_of_text_records])]
self.raw_text = b''.join(r.raw for r in self.text_records) self.raw_text = b''.join(r.raw for r in self.text_records)
self.extract_resources()
def print_header(self, f=sys.stdout): def print_header(self, f=sys.stdout):
print (str(self.mf.palmdb).encode('utf-8'), file=f) print (str(self.mf.palmdb).encode('utf-8'), file=f)
@ -41,6 +43,42 @@ class MOBIFile(object):
print (file=f) print (file=f)
print (str(self.mf.mobi8_header).encode('utf-8'), file=f) print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
def extract_resources(self):
self.resource_map = []
known_types = {b'FLIS', b'FCIS', b'SRCS',
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
b'AUDI', b'VIDE'}
for i, rec in enumerate(self.resource_records):
sig = rec.raw[:4]
payload = rec.raw
ext = 'dat'
prefix = 'binary'
suffix = ''
if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
# TODO: Ignore CNCX records as well
if sig == b'FONT':
font = read_font_record(rec.raw)
if font['err']:
raise ValueError('Failed to read font record: %s Headers: %s'%(
font['err'], font['headers']))
payload = (font['font_data'] if font['font_data'] else
font['raw_data'])
prefix, ext = 'fonts', font['ext']
elif sig not in known_types:
q = imghdr.what(None, rec.raw)
if q:
prefix, ext = 'images', q
if prefix == 'binary':
if sig == b'\xe9\x8e\r\n':
suffix = '-EOF'
elif sig in known_types:
suffix = '-' + sig.decode('ascii')
self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
payload))
def inspect_mobi(mobi_file, ddir): def inspect_mobi(mobi_file, ddir):
f = MOBIFile(mobi_file) f = MOBIFile(mobi_file)
@ -51,12 +89,14 @@ def inspect_mobi(mobi_file, ddir):
with open(alltext, 'wb') as of: with open(alltext, 'wb') as of:
of.write(f.raw_text) of.write(f.raw_text)
for tdir, attr in [('text_records', 'text_records'), ('images', for x in ('text_records', 'images', 'fonts', 'binary'):
'image_records'), ('binary', 'binary_records'), ('font', os.mkdir(os.path.join(ddir, x))
'font_records')]:
tdir = os.path.join(ddir, tdir) for rec in f.text_records:
os.mkdir(tdir) rec.dump(os.path.join(ddir, 'text_records'))
for rec in getattr(f, attr, []):
rec.dump(tdir) for href, payload in f.resource_map:
with open(os.path.join(ddir, href), 'wb') as f:
f.write(payload)

View File

@ -11,7 +11,7 @@ import struct, re, os
from calibre import replace_entities from calibre import replace_entities
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, check_isbn
from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
NULL_INDEX = 0xffffffff NULL_INDEX = 0xffffffff
@ -75,10 +75,14 @@ class EXTHHeader(object): # {{{
self.mi.author_sort = au.strip() self.mi.author_sort = au.strip()
elif idx == 101: elif idx == 101:
self.mi.publisher = content.decode(codec, 'ignore').strip() self.mi.publisher = content.decode(codec, 'ignore').strip()
if self.mi.publisher in {'Unknown', _('Unknown')}:
self.mi.publisher = None
elif idx == 103: elif idx == 103:
self.mi.comments = content.decode(codec, 'ignore') self.mi.comments = content.decode(codec, 'ignore')
elif idx == 104: elif idx == 104:
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '') raw = check_isbn(content.decode(codec, 'ignore').strip().replace('-', ''))
if raw:
self.mi.isbn = raw
elif idx == 105: elif idx == 105:
if not self.mi.tags: if not self.mi.tags:
self.mi.tags = [] self.mi.tags = []
@ -92,12 +96,24 @@ class EXTHHeader(object): # {{{
pass pass
elif idx == 108: elif idx == 108:
self.mi.book_producer = content.decode(codec, 'ignore').strip() self.mi.book_producer = content.decode(codec, 'ignore').strip()
elif idx == 112: # dc:source set in some EBSP amazon samples
try:
content = content.decode(codec).strip()
isig = 'urn:isbn:'
if content.lower().startswith(isig):
raw = check_isbn(content[len(isig):])
if raw and not self.mi.isbn:
self.mi.isbn = raw
except:
pass
elif idx == 113: elif idx == 113:
pass # ASIN or UUID pass # ASIN or UUID
elif idx == 116: elif idx == 116:
self.start_offset, = struct.unpack(b'>L', content) self.start_offset, = struct.unpack(b'>L', content)
elif idx == 121: elif idx == 121:
self.kf8_header, = struct.unpack(b'>L', content) self.kf8_header, = struct.unpack(b'>L', content)
if self.kf8_header == NULL_INDEX:
self.kf8_header = None
#else: #else:
# print 'unhandled metadata record', idx, repr(content) # print 'unhandled metadata record', idx, repr(content)
# }}} # }}}

View File

@ -39,10 +39,41 @@ def parse_indx_header(data):
words = ( words = (
'len', 'nul1', 'type', 'gen', 'start', 'count', 'code', 'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx' 'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx'
) ) + tuple('unknown%d'%i for i in xrange(27)) + ('ocnt', 'oentries',
'ordt1', 'ordt2', 'tagx')
num = len(words) num = len(words)
values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)]) values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)])
return dict(zip(words, values)) ans = dict(zip(words, values))
ordt1, ordt2 = ans['ordt1'], ans['ordt2']
ans['ordt1_raw'], ans['ordt2_raw'] = [], []
ans['ordt_map'] = ''
if ordt1 > 0 and data[ordt1:ordt1+4] == b'ORDT':
# I dont know what this is, but using it seems to be unnecessary, so
# just leave it as the raw bytestring
ans['ordt1_raw'] = data[ordt1+4:ordt1+4+ans['oentries']]
if ordt2 > 0 and data[ordt2:ordt2+4] == b'ORDT':
ans['ordt2_raw'] = raw = bytearray(data[ordt2+4:ordt2+4+2*ans['oentries']])
if ans['code'] == 65002:
# This appears to be EBCDIC-UTF (65002) encoded. I can't be
# bothered to write a decoder for this (see
# http://www.unicode.org/reports/tr16/) Just how stupid is Amazon?
# Instead, we use a weird hack that seems to do the trick for all
# the books with this type of ORDT record that I have come across.
# Some EBSP book samples in KF8 format from Amazon have this type
# of encoding.
# Basically we try to interpret every second byte as a printable
# ascii character. If we cannot, we map to the ? char.
parsed = bytearray(ans['oentries'])
for i in xrange(0, 2*ans['oentries'], 2):
parsed[i//2] = raw[i+1] if 0x20 < raw[i+1] < 0x7f else ord(b'?')
ans['ordt_map'] = bytes(parsed).decode('ascii')
else:
ans['ordt_map'] = '?'*ans['oentries']
return ans
class CNCX(object): # {{{ class CNCX(object): # {{{
@ -163,7 +194,7 @@ def get_tag_map(control_byte_count, tagx, data, strict=False):
return ans return ans
def parse_index_record(table, data, control_byte_count, tags, codec, def parse_index_record(table, data, control_byte_count, tags, codec,
strict=False): ordt_map, strict=False):
header = parse_indx_header(data) header = parse_indx_header(data)
idxt_pos = header['start'] idxt_pos = header['start']
if data[idxt_pos:idxt_pos+4] != b'IDXT': if data[idxt_pos:idxt_pos+4] != b'IDXT':
@ -184,12 +215,11 @@ def parse_index_record(table, data, control_byte_count, tags, codec,
for j in xrange(entry_count): for j in xrange(entry_count):
start, end = idx_positions[j:j+2] start, end = idx_positions[j:j+2]
rec = data[start:end] rec = data[start:end]
ident, consumed = decode_string(rec, codec=codec) ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map)
rec = rec[consumed:] rec = rec[consumed:]
tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict) tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
table[ident] = tag_map table[ident] = tag_map
def read_index(sections, idx, codec): def read_index(sections, idx, codec):
table, cncx = OrderedDict(), CNCX([], codec) table, cncx = OrderedDict(), CNCX([], codec)
@ -203,12 +233,13 @@ def read_index(sections, idx, codec):
cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]] cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
cncx = CNCX(cncx_records, codec) cncx = CNCX(cncx_records, codec)
tag_section_start = indx_header['len'] tag_section_start = indx_header['tagx']
control_byte_count, tags = parse_tagx_section(data[tag_section_start:]) control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
for i in xrange(idx + 1, idx + 1 + indx_count): for i in xrange(idx + 1, idx + 1 + indx_count):
# Index record # Index record
data = sections[i][0] data = sections[i][0]
parse_index_record(table, data, control_byte_count, tags, codec) parse_index_record(table, data, control_byte_count, tags, codec,
indx_header['ordt_map'])
return table, cncx return table, cncx

View File

@ -285,7 +285,11 @@ class Mobi8Reader(object):
def create_guide(self): def create_guide(self):
guide = Guide() guide = Guide()
for ref_type, ref_title, fileno in self.guide: for ref_type, ref_title, fileno in self.guide:
try:
elem = self.elems[fileno] elem = self.elems[fileno]
except IndexError:
# Happens for thumbnailstandard in Amazon book samples
continue
fi = self.get_file_info(elem.insert_pos) fi = self.get_file_info(elem.insert_pos)
idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec) idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec)
linktgt = fi.filename linktgt = fi.filename

View File

@ -15,10 +15,12 @@ from calibre.ebooks import normalize
IMAGE_MAX_SIZE = 10 * 1024 * 1024 IMAGE_MAX_SIZE = 10 * 1024 * 1024
def decode_string(raw, codec='utf-8'): def decode_string(raw, codec='utf-8', ordt_map=''):
length, = struct.unpack(b'>B', raw[0]) length, = struct.unpack(b'>B', raw[0])
raw = raw[1:1+length] raw = raw[1:1+length]
consumed = length+1 consumed = length+1
if ordt_map:
return ''.join(ordt_map[ord(x)] for x in raw), consumed
return raw.decode(codec), consumed return raw.decode(codec), consumed
def decode_hex_number(raw, codec='utf-8'): def decode_hex_number(raw, codec='utf-8'):

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre import guess_type
class EntityDeclarationProcessor(object): # {{{
def __init__(self, html):
self.declared_entities = {}
for match in re.finditer(r'<!\s*ENTITY\s+([^>]+)>', html):
tokens = match.group(1).split()
if len(tokens) > 1:
self.declared_entities[tokens[0].strip()] = tokens[1].strip().replace('"', '')
self.processed_html = html
for key, val in self.declared_entities.iteritems():
self.processed_html = self.processed_html.replace('&%s;'%key, val)
# }}}
def self_closing_sub(match):
tag = match.group(1)
if tag.lower().strip() == 'br':
return match.group()
return '<%s %s></%s>'%(match.group(1), match.group(2), match.group(1))
def load_html(path, view, codec='utf-8', mime_type=None,
pre_load_callback=lambda x:None):
from PyQt4.Qt import QUrl, QByteArray
if mime_type is None:
mime_type = guess_type(path)[0]
with open(path, 'rb') as f:
html = f.read().decode(codec, 'replace')
html = EntityDeclarationProcessor(html).processed_html
has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None
if 'xhtml' in mime_type:
self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>',
re.IGNORECASE)
html = self_closing_pat.sub(self_closing_sub, html)
html = re.sub(ur'<\s*title\s*/\s*>', u'', html, flags=re.IGNORECASE)
loading_url = QUrl.fromLocalFile(path)
pre_load_callback(loading_url)
if has_svg:
view.setContent(QByteArray(html.encode(codec)), mime_type,
loading_url)
else:
view.setHtml(html, loading_url)

View File

@ -18,10 +18,11 @@ from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre import __appname__, __version__, fit_image from calibre import __appname__, __version__, fit_image
from calibre.ebooks.oeb.display.webview import load_html
from PyQt4 import QtCore from PyQt4 import QtCore
from PyQt4.Qt import QUrl, QEventLoop, QObject, \ from PyQt4.Qt import (QEventLoop, QObject,
QPrinter, QMetaObject, QSizeF, Qt, QPainter, QPixmap QPrinter, QMetaObject, QSizeF, Qt, QPainter, QPixmap)
from PyQt4.QtWebKit import QWebView from PyQt4.QtWebKit import QWebView
from pyPdf import PdfFileWriter, PdfFileReader from pyPdf import PdfFileWriter, PdfFileReader
@ -70,7 +71,7 @@ def get_pdf_printer(opts, for_comic=False):
opts.margin_right, opts.margin_bottom, QPrinter.Point) opts.margin_right, opts.margin_bottom, QPrinter.Point)
printer.setOrientation(orientation(opts.orientation)) printer.setOrientation(orientation(opts.orientation))
printer.setOutputFormat(QPrinter.PdfFormat) printer.setOutputFormat(QPrinter.PdfFormat)
printer.setFullPage(True) printer.setFullPage(for_comic)
return printer return printer
def get_printer_page_size(opts, for_comic=False): def get_printer_page_size(opts, for_comic=False):
@ -156,8 +157,7 @@ class PDFWriter(QObject): # {{{
self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1)))
self.logger.debug('Processing %s...' % item) self.logger.debug('Processing %s...' % item)
load_html(item, self.view)
self.view.load(QUrl.fromLocalFile(item))
def _render_html(self, ok): def _render_html(self, ok):
if ok: if ok:
@ -171,6 +171,10 @@ class PDFWriter(QObject): # {{{
# previously set on the printer. # previously set on the printer.
if isosx: if isosx:
printer.setOutputFormat(QPrinter.NativeFormat) printer.setOutputFormat(QPrinter.NativeFormat)
self.view.page().mainFrame().evaluateJavaScript('''
document.body.style.backgroundColor = "white";
''')
self.view.print_(printer) self.view.print_(printer)
printer.abort() printer.abort()
else: else:

View File

@ -9,8 +9,8 @@ __docformat__ = 'restructuredtext en'
__license__ = 'GPL v3' __license__ = 'GPL v3'
from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox, \ from PyQt4.Qt import (QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
QListWidget, QAbstractItemView QListWidget, QAbstractItemView)
from PyQt4 import QtGui from PyQt4 import QtGui
class ChoosePluginToolbarsDialog(QDialog): class ChoosePluginToolbarsDialog(QDialog):
@ -39,6 +39,9 @@ class ChoosePluginToolbarsDialog(QDialog):
self._locations_list.setSizePolicy(sizePolicy) self._locations_list.setSizePolicy(sizePolicy)
for key, text in locations: for key, text in locations:
self._locations_list.addItem(text) self._locations_list.addItem(text)
if key in {'toolbar', 'toolbar-device'}:
self._locations_list.item(self._locations_list.count()-1
).setSelected(True)
self._layout.addWidget(self._locations_list) self._layout.addWidget(self._locations_list)
self._footer_label = QLabel( self._footer_label = QLabel(

View File

@ -11,9 +11,9 @@ from datetime import timedelta
import calendar, textwrap import calendar, textwrap
from collections import OrderedDict from collections import OrderedDict
from PyQt4.Qt import QDialog, Qt, QTime, QObject, QMenu, QHBoxLayout, \ from PyQt4.Qt import (QDialog, Qt, QTime, QObject, QMenu, QHBoxLayout,
QAction, QIcon, QMutex, QTimer, pyqtSignal, QWidget, QGridLayout, \ QAction, QIcon, QMutex, QTimer, pyqtSignal, QWidget, QGridLayout,
QCheckBox, QTimeEdit, QLabel, QLineEdit, QDoubleSpinBox QCheckBox, QTimeEdit, QLabel, QLineEdit, QDoubleSpinBox)
from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog
from calibre.gui2 import config as gconf, error_dialog from calibre.gui2 import config as gconf, error_dialog
@ -317,6 +317,8 @@ class SchedulerDialog(QDialog, Ui_Dialog):
return False return False
if un or pw: if un or pw:
self.recipe_model.set_account_info(urn, un, pw) self.recipe_model.set_account_info(urn, un, pw)
else:
self.recipe_model.clear_account_info(urn)
if self.schedule.isChecked(): if self.schedule.isChecked():
schedule_type, schedule = \ schedule_type, schedule = \

View File

@ -151,7 +151,7 @@ class UpdateMixin(object):
plt = u'' plt = u''
if has_plugin_updates: if has_plugin_updates:
plt = _(' (%d plugin updates)')%plugin_updates plt = _(' (%d plugin updates)')%plugin_updates
msg = (u'<span style="color:red; font-weight: bold">%s: ' msg = (u'<span style="color:green; font-weight: bold">%s: '
u'<a href="update:%s">%s%s</a></span>') % ( u'<a href="update:%s">%s%s</a></span>') % (
_('Update found'), version, calibre_version, plt) _('Update found'), version, calibre_version, plt)
else: else:

View File

@ -4,14 +4,14 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
# Imports {{{ # Imports {{{
import os, math, re, glob, sys, zipfile import os, math, glob, sys, zipfile
from base64 import b64encode from base64 import b64encode
from functools import partial from functools import partial
from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt,
QPainter, QPalette, QBrush, QFontDatabase, QDialog, QPainter, QPalette, QBrush, QFontDatabase, QDialog,
QColor, QPoint, QImage, QRegion, QVariant, QIcon, QColor, QPoint, QImage, QRegion, QVariant, QIcon,
QFont, pyqtSignature, QAction, QByteArray, QMenu, QFont, pyqtSignature, QAction, QMenu,
pyqtSignal, QSwipeGesture, QApplication) pyqtSignal, QSwipeGesture, QApplication)
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
@ -21,10 +21,11 @@ from calibre.gui2.viewer.config_ui import Ui_Dialog
from calibre.gui2.viewer.flip import SlideFlip from calibre.gui2.viewer.flip import SlideFlip
from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre import prints, guess_type from calibre import prints
from calibre.gui2.viewer.keys import SHORTCUTS from calibre.gui2.viewer.keys import SHORTCUTS
from calibre.gui2.viewer.javascript import JavaScriptLoader from calibre.gui2.viewer.javascript import JavaScriptLoader
from calibre.gui2.viewer.position import PagePosition from calibre.gui2.viewer.position import PagePosition
from calibre.ebooks.oeb.display.webview import load_html
# }}} # }}}
@ -474,19 +475,6 @@ class Document(QWebPage): # {{{
# }}} # }}}
class EntityDeclarationProcessor(object): # {{{
def __init__(self, html):
self.declared_entities = {}
for match in re.finditer(r'<!\s*ENTITY\s+([^>]+)>', html):
tokens = match.group(1).split()
if len(tokens) > 1:
self.declared_entities[tokens[0].strip()] = tokens[1].strip().replace('"', '')
self.processed_html = html
for key, val in self.declared_entities.iteritems():
self.processed_html = self.processed_html.replace('&%s;'%key, val)
# }}}
class DocumentView(QWebView): # {{{ class DocumentView(QWebView): # {{{
magnification_changed = pyqtSignal(object) magnification_changed = pyqtSignal(object)
@ -497,8 +485,6 @@ class DocumentView(QWebView): # {{{
self.is_auto_repeat_event = False self.is_auto_repeat_event = False
self.debug_javascript = debug_javascript self.debug_javascript = debug_javascript
self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer') self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer')
self.self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>',
re.IGNORECASE)
self.setSizePolicy(QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)) self.setSizePolicy(QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding))
self._size_hint = QSize(510, 680) self._size_hint = QSize(510, 680)
self.initial_pos = 0.0 self.initial_pos = 0.0
@ -689,31 +675,16 @@ class DocumentView(QWebView): # {{{
def path(self): def path(self):
return os.path.abspath(unicode(self.url().toLocalFile())) return os.path.abspath(unicode(self.url().toLocalFile()))
def self_closing_sub(self, match):
tag = match.group(1)
if tag.lower().strip() == 'br':
return match.group()
return '<%s %s></%s>'%(match.group(1), match.group(2), match.group(1))
def load_path(self, path, pos=0.0): def load_path(self, path, pos=0.0):
self.initial_pos = pos self.initial_pos = pos
mt = getattr(path, 'mime_type', None)
if mt is None:
mt = guess_type(path)[0]
html = open(path, 'rb').read().decode(path.encoding, 'replace')
html = EntityDeclarationProcessor(html).processed_html
has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None
if 'xhtml' in mt: def callback(lu):
html = self.self_closing_pat.sub(self.self_closing_sub, html) self.loading_url = lu
if self.manager is not None: if self.manager is not None:
self.manager.load_started() self.manager.load_started()
self.loading_url = QUrl.fromLocalFile(path)
html = re.sub(ur'<\s*title\s*/\s*>', u'', html, flags=re.IGNORECASE) load_html(path, self, codec=path.encoding, mime_type=getattr(path,
if has_svg: 'mime_type', None), pre_load_callback=callback)
self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path))
else:
self.setHtml(html, self.loading_url)
self.turn_off_internal_scrollbars() self.turn_off_internal_scrollbars()
def initialize_scrollbar(self): def initialize_scrollbar(self):

View File

@ -27,6 +27,7 @@ from calibre.ebooks.metadata import MetaInformation
from calibre.customize.ui import available_input_formats from calibre.customize.ui import available_input_formats
from calibre.gui2.viewer.dictionary import Lookup from calibre.gui2.viewer.dictionary import Lookup
from calibre import as_unicode, force_unicode, isbytestring from calibre import as_unicode, force_unicode, isbytestring
from calibre.ptempfile import reset_base_dir
vprefs = JSONConfig('viewer') vprefs = JSONConfig('viewer')
@ -947,6 +948,7 @@ View an ebook.
def main(args=sys.argv): def main(args=sys.argv):
# Ensure viewer can continue to function if GUI is closed # Ensure viewer can continue to function if GUI is closed
os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None) os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None)
reset_base_dir()
parser = option_parser() parser = option_parser()
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)

View File

@ -233,7 +233,7 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle,
if not mi.authors: if not mi.authors:
mi.authors = [_('Unknown')] mi.authors = [_('Unknown')]
for x in ('title', 'authors', 'isbn', 'tags', 'series'): for x in ('title', 'authors', 'isbn', 'tags', 'series'):
val = locals()[x] val = locals()['o'+x]
if val: setattr(mi, x[1:], val) if val: setattr(mi, x[1:], val)
if oseries: if oseries:
mi.series_index = oseries_index mi.series_index = oseries_index
@ -356,7 +356,7 @@ def command_add(args, dbpath):
print >>sys.stderr, _('You must specify at least one file to add') print >>sys.stderr, _('You must specify at least one file to add')
return 1 return 1
do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory, do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory,
opts.recurse, opts.duplicates, opts.title, opts.author, opts.isbn, opts.recurse, opts.duplicates, opts.title, opts.authors, opts.isbn,
tags, opts.series, opts.series_index) tags, opts.series, opts.series_index)
return 0 return 0

View File

@ -40,6 +40,46 @@ entry_points = {
], ],
} }
class PreserveMIMEDefaults(object):
def __init__(self):
self.initial_values = {}
def __enter__(self):
def_data_dirs = '/usr/local/share:/usr/share'
paths = os.environ.get('XDG_DATA_DIRS', def_data_dirs)
paths = paths.split(':')
paths.append(os.environ.get('XDG_DATA_HOME', os.path.expanduser(
'~/.local/share')))
paths = list(filter(os.path.isdir, paths))
if not paths:
# Env var had garbage in it, ignore it
paths = def_data_dirs.split(':')
paths = list(filter(os.path.isdir, paths))
self.paths = {os.path.join(x, 'applications/defaults.list') for x in
paths}
self.initial_values = {}
for x in self.paths:
try:
with open(x, 'rb') as f:
self.initial_values[x] = f.read()
except:
self.initial_values[x] = None
def __exit__(self, *args):
for path, val in self.initial_values.iteritems():
if val is None:
try:
os.remove(path)
except:
pass
elif os.path.exists(path):
with open(path, 'r+b') as f:
if f.read() != val:
f.seek(0)
f.truncate()
f.write(val)
# Uninstall script {{{ # Uninstall script {{{
UNINSTALL = '''\ UNINSTALL = '''\
#!{python} #!{python}
@ -333,12 +373,10 @@ class PostInstall:
def setup_desktop_integration(self): # {{{ def setup_desktop_integration(self): # {{{
try: try:
self.info('Setting up desktop integration...') self.info('Setting up desktop integration...')
with TemporaryDirectory() as tdir, CurrentDir(tdir), \
with TemporaryDirectory() as tdir: PreserveMIMEDefaults():
with CurrentDir(tdir):
render_img('mimetypes/lrf.png', 'calibre-lrf.png') render_img('mimetypes/lrf.png', 'calibre-lrf.png')
check_call('xdg-icon-resource install --noupdate --context mimetypes --size 128 calibre-lrf.png application-lrf', shell=True) check_call('xdg-icon-resource install --noupdate --context mimetypes --size 128 calibre-lrf.png application-lrf', shell=True)
self.icon_resources.append(('mimetypes', 'application-lrf', '128')) self.icon_resources.append(('mimetypes', 'application-lrf', '128'))

View File

@ -74,6 +74,11 @@ def base_dir():
return _base_dir return _base_dir
def reset_base_dir():
global _base_dir
_base_dir = None
base_dir()
def force_unicode(x): def force_unicode(x):
# Cannot use the implementation in calibre.__init__ as it causes a circular # Cannot use the implementation in calibre.__init__ as it causes a circular
# dependency # dependency

View File

@ -437,6 +437,14 @@ class SchedulerConfig(object):
if x.get('id', False) == urn: if x.get('id', False) == urn:
return x.get('username', ''), x.get('password', '') return x.get('username', ''), x.get('password', '')
def clear_account_info(self, urn):
with self.lock:
for x in self.iter_accounts():
if x.get('id', False) == urn:
x.getparent().remove(x)
self.write_scheduler_file()
break
def get_customize_info(self, urn): def get_customize_info(self, urn):
keep_issues = 0 keep_issues = 0
add_title_tag = True add_title_tag = True

View File

@ -354,6 +354,9 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
def set_account_info(self, urn, un, pw): def set_account_info(self, urn, un, pw):
self.scheduler_config.set_account_info(urn, un, pw) self.scheduler_config.set_account_info(urn, un, pw)
def clear_account_info(self, urn):
self.scheduler_config.clear_account_info(urn)
def get_account_info(self, urn): def get_account_info(self, urn):
return self.scheduler_config.get_account_info(urn) return self.scheduler_config.get_account_info(urn)