revisions for annotations

This commit is contained in:
GRiker 2010-03-05 10:48:49 -08:00
commit b86b7a9a80
24 changed files with 801 additions and 226 deletions

View File

@ -0,0 +1,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
class APOD(BasicNewsRecipe):
title = u'Astronomy Picture of the Day'
__author__ = 'Starson17'
description = 'Astronomy Pictures'
language = 'en'
use_embedded_content = False
no_stylesheets = True
cover_url = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
remove_javascript = True
recursions = 0
oldest_article = 14
feeds = [
(u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def postprocess_html(self, soup, first_fetch):
center_tags = soup.findAll(['center'])
p_tags = soup.findAll(['p'])
last_center = center_tags[-1:]
last_center[0].extract()
first_p = p_tags[:1]
for tag in first_p:
tag.extract()
last2_p = p_tags[-2:]
for tag in last2_p:
tag.extract()
return soup

View File

@ -5,6 +5,7 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
arstechnica.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
@ -20,7 +21,7 @@ class ArsTechnica2(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
extra_css = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
extra_css = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
conversion_options = {
'comments' : description
@ -30,6 +31,10 @@ class ArsTechnica2(BasicNewsRecipe):
}
preprocess_regexps = [
(re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
]
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
@ -37,7 +42,7 @@ class ArsTechnica2(BasicNewsRecipe):
dict(name=['object','link','embed'])
,dict(name='div', attrs={'class':'read-more-link'})
]
remove_attributes=['width','height']
feeds = [
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
@ -90,3 +95,5 @@ class ArsTechnica2(BasicNewsRecipe):
return soup
def get_article_url(self, article):
return article.get('guid', None).rpartition('?')[0]

View File

@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.recipes import BasicNewsRecipe
class JournalofHospitalMedicine(BasicNewsRecipe):
title = 'Journal of Hospital Medicine'
__author__ = 'Krittika Goyal'
description = 'Medical news'
timefmt = ' [%d %b, %Y]'
needs_subscription = True
no_stylesheets = True
#remove_tags_before = dict(name='div', attrs={'align':'center'})
#remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':'subContent'}),
dict(name='div', attrs={'id':['contentFrame']}),
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
#dict(name='table', attrs={'align':'RIGHT'}),
]
# TO LOGIN
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www3.interscience.wiley.com/cgi-bin/home')
br.select_form(name='siteLogin')
br['LoginName'] = self.username
br['Password'] = self.password
response = br.submit()
raw = response.read()
if 'userName = ""' in raw:
raise Exception('Login failed. Check your username and password')
return br
#TO GET ARTICLE TOC
def johm_get_index(self):
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
# To parse artice toc
def parse_index(self):
parse_soup = self.johm_get_index()
div = parse_soup.find(id='contentCell')
current_section = None
current_articles = []
feeds = []
for x in div.findAll(True):
if x.name == 'h4':
# Section heading found
if current_articles and current_section:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
self.log('\tFound section:', current_section)
if current_section is not None and x.name == 'strong':
title = self.tag_to_string(x)
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
if p is None:
continue
url = p.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://www3.interscience.wiley.com'+url
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
#if url.startswith('/'):
#url = 'http://online.wsj.com'+url
current_articles.append({'title': title, 'url':url,
'description':'', 'date':''})
if current_articles and current_section:
feeds.append((current_section, current_articles))
return feeds
def preprocess_html(self, soup):
for img in soup.findAll('img', src=True):
img['src'] = img['src'].replace('tfig', 'nfig')
return soup

View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class SanFranciscoBayGuardian(BasicNewsRecipe):
title = u'San Francisco Bay Guardian'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
#encoding = 'latin1'
no_stylesheets = True
remove_tags_before = dict(name='div', attrs={'id':'story_header'})
remove_tags_after = dict(name='div', attrs={'id':'shirttail'})
remove_tags = [
dict(name='iframe'),
#dict(name='div', attrs={'class':'related-articles'}),
dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
#dict(name='ul', attrs={'class':'article-tools'}),
dict(name='ul', attrs={'id':'story_tabs'}),
]
feeds = [
('Cover', 'http://www.newsobserver.com/100/index.rss'),
('News', 'http://www.newsobserver.com/102/index.rss'),
('Politics', 'http://www.newsobserver.com/105/index.rss'),
('Business', 'http://www.newsobserver.com/104/index.rss'),
('Sports', 'http://www.newsobserver.com/103/index.rss'),
('College Sports', 'http://www.newsobserver.com/119/index.rss'),
('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
('Editorials', 'http://www.newsobserver.com/158/index.rss')]
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id':'story_body'})
#td = heading.findParent(name='td')
#td.extract()
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, story)
return soup

View File

@ -0,0 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class SmithsonianMagazine(BasicNewsRecipe):
title = u'Smithsonian Magazine'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 31#days
max_articles_per_feed = 50
#encoding = 'latin1'
recursions = 1
match_regexps = ['&page=[2-9]$']
remove_stylesheets = True
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
remove_tags_after = dict(name='p', attrs={'id':'articlePaginationWrapper'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':'article_sidebar_border'}),
dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}),
#dict(name='ul', attrs={'class':'article-tools'}),
dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
]
feeds = [
('History and Archeology',
'http://feeds.feedburner.com/smithsonianmag/history-archaeology'),
('People and Places',
'http://feeds.feedburner.com/smithsonianmag/people-places'),
('Science and Nature',
'http://feeds.feedburner.com/smithsonianmag/science-nature'),
('Arts and Culture',
'http://feeds.feedburner.com/smithsonianmag/arts-culture'),
('Travel',
'http://feeds.feedburner.com/smithsonianmag/travel'),
]
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id':'article-left'})
#td = heading.findParent(name='td')
#td.extract()
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, story)
return soup
def postprocess_html(self, soup, first):
for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
if not first:
for div in soup.findAll(id='article-head'): div.extract()
return soup

View File

@ -143,7 +143,7 @@ extensions = [
if iswindows:
extensions.append(Extension('winutil',
['calibre/utils/windows/winutil.c'],
libraries=['shell32', 'setupapi'],
libraries=['shell32', 'setupapi', 'wininet'],
cflags=['/X']
))

View File

@ -6,6 +6,7 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
a backend that implement the Device interface for the SONY PRS500 Reader.
"""
import os
from collections import namedtuple
from calibre.customize import Plugin
from calibre.constants import iswindows
@ -43,6 +44,9 @@ class DevicePlugin(Plugin):
#: Icon for this device
icon = I('reader.svg')
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
UserAnnotation = namedtuple('Annotation','type, bookmark')
@classmethod
def get_gui_name(cls):
if hasattr(cls, 'gui_name'):

View File

@ -7,15 +7,12 @@ __docformat__ = 'restructuredtext en'
'''
Device driver for Amazon's Kindle
'''
import os, re, sys
from cStringIO import StringIO
import os
import re
from struct import unpack
import sys
from calibre.devices.usbms.driver import USBMS
from calibre.ebooks.metadata.mobi import StreamSlicer
from calibre.utils.logging import Log
class KINDLE(USBMS):
@ -65,6 +62,16 @@ class KINDLE(USBMS):
return mi
def get_annotations(self, path_map):
MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt']
TAN_FORMATS = [u'tpz', u'azw1']
mbp_formats = set()
for fmt in MBP_FORMATS:
mbp_formats.add(fmt)
tan_formats = set()
for fmt in TAN_FORMATS:
tan_formats.add(fmt)
def get_storage():
storage = []
if self._main_prefix:
@ -75,36 +82,49 @@ class KINDLE(USBMS):
storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
return storage
def resolve_mbp_paths(storage, path_map):
def resolve_bookmark_paths(storage, path_map):
pop_list = []
book_ext = {}
for id in path_map:
file_fmts = set()
for fmt in path_map[id]['fmts']:
file_fmts.add(fmt)
bookmark_extension = None
if file_fmts.intersection(mbp_formats):
book_extension = list(file_fmts.intersection(mbp_formats))[0]
bookmark_extension = 'mbp'
elif file_fmts.intersection(tan_formats):
book_extension = list(file_fmts.intersection(tan_formats))[0]
bookmark_extension = 'tan'
if bookmark_extension:
for vol in storage:
#print "path_map[id]: %s" % path_map[id]
mbp_path = path_map[id].replace(os.path.abspath('/<storage>'),vol)
#print "looking for mbp_path: %s" % mbp_path
if os.path.exists(mbp_path):
#print "mbp_path found"
path_map[id] = mbp_path
bkmk_path = path_map[id]['path'].replace(os.path.abspath('/<storage>'),vol)
bkmk_path = bkmk_path.replace('bookmark',bookmark_extension)
if os.path.exists(bkmk_path):
path_map[id] = bkmk_path
book_ext[id] = book_extension
break
else:
#print "mbp_path not found"
pop_list.append(id)
else:
pop_list.append(id)
# Remove non-existent mbp files
# Remove non-existent bookmark templates
for id in pop_list:
path_map.pop(id)
return path_map
return path_map, book_ext
log = Log()
storage = get_storage()
path_map = resolve_mbp_paths(storage, path_map)
path_map, book_ext = resolve_bookmark_paths(storage, path_map)
# path_map is now a mapping of valid mbp files
# Not yet implemented - Topaz annotations
bookmarked_books = {}
MBP_FORMATS = ['azw', 'mobi', 'prc', 'txt']
for id in path_map:
myBookmark = Bookmark(path_map[id], MBP_FORMATS, id)
bookmarked_books[id] = self.UserAnnotation(type='mobi', bookmark=myBookmark)
bookmark_ext = path_map[id].rpartition('.')[2]
myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
bookmarked_books[id] = self.UserAnnotation(type='kindle', bookmark=myBookmark)
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
return bookmarked_books
@ -134,22 +154,25 @@ class Bookmark():
A simple class fetching bookmark data
Kindle-specific
'''
def __init__(self, path, formats, id):
self.book_format = None
def __init__(self, path, id, book_format, bookmark_extension):
self.book_format = book_format
self.bookmark_extension = bookmark_extension
self.book_length = 0
self.id = id
self.last_read = 0
self.last_read_location = 0
self.timestamp = 0
self.user_notes = None
self.get_bookmark_data(path)
self.get_book_length(path, formats)
self.get_book_length(path)
try:
self.percent_read = float(100*self.last_read_location / self.book_length)
self.percent_read = float(100*self.last_read / self.book_length)
except:
self.percent_read = 0
def record(self, n):
from calibre.ebooks.metadata.mobi import StreamSlicer
if n >= self.nrecs:
raise ValueError('non-existent record %r' % n)
offoff = 78 + (8 * n)
@ -159,19 +182,22 @@ class Bookmark():
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
return StreamSlicer(self.stream, start, stop)
def get_bookmark_data(self, path, fetchUserNotes=True):
def get_bookmark_data(self, path):
''' Return the timestamp and last_read_location '''
from calibre.ebooks.metadata.mobi import StreamSlicer
user_notes = {}
if self.bookmark_extension == 'mbp':
with open(path,'rb') as f:
stream = StringIO(f.read())
data = StreamSlicer(stream)
self.timestamp, = unpack('>I', data[0x24:0x28])
bpar_offset, = unpack('>I', data[0x4e:0x52])
lrlo = bpar_offset + 0x0c
self.last_read_location = int(unpack('>I', data[lrlo:lrlo+4])[0])
self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
self.last_read_location = self.last_read/150 + 1
entries, = unpack('>I', data[0x4a:0x4e])
# Store the annotations/locations
if fetchUserNotes:
bpl = bpar_offset + 4
bpar_len, = unpack('>I', data[bpl:bpl+4])
bpar_len += 8
@ -184,7 +210,6 @@ class Bookmark():
current_entry = 1
sig = data[eo:eo+4]
previous_block = None
user_notes = {}
while sig == 'DATA':
text = None
@ -206,8 +231,11 @@ class Bookmark():
text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
if entry_type:
user_notes[location] = dict(type=entry_type, id=self.id,
text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
displayed_location = location/150 + 1
user_notes[location] = dict(id=self.id,
displayed_location=displayed_location,
type=entry_type,
text=text)
#print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '')
#if current_block == 'text_block':
#self.textdump(text)
@ -229,34 +257,79 @@ class Bookmark():
# If a bookmark coincides with a user annotation, the locs could
# be the same - cheat by nudging -1
# Skip bookmark for last_read_location
if end_loc != self.last_read_location:
user_notes[end_loc - 1] = dict(type='Bookmark',id=self.id,text=None)
if end_loc != self.last_read:
displayed_location = end_loc/150 + 1
user_notes[end_loc - 1] = dict(id=self.id,
displayed_location=displayed_location,
type='Bookmark',
text=None)
rec_len, = unpack('>I', data[eo+4:eo+8])
eo += rec_len + 8
sig = data[eo:eo+4]
elif self.bookmark_extension == 'tan':
# TAN bookmarks
self.timestamp = os.path.getmtime(path)
with open(path,'rb') as f:
stream = StringIO(f.read())
data = StreamSlicer(stream)
self.last_read = int(unpack('>I', data[5:9])[0])
self.last_read_location = self.last_read/33
entries, = unpack('>I', data[9:13])
current_entry = 0
e_base = 0x0d
while current_entry < entries:
location, = unpack('>I', data[e_base+2:e_base+6])
text = None
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
e_type, = unpack('>B', data[e_base+1])
if e_type == 0:
e_type = 'Bookmark'
elif e_type == 1:
e_type = 'Highlight'
text = "(Topaz highlights not yet supported)"
elif e_type == 2:
e_type = 'Note'
text = data[e_base+0x10:e_base+0x10+text_len]
else:
e_type = 'Unknown annotation type'
if self.book_format in ['tpz','azw1']:
# *** This needs fine-tuning
displayed_location = location/33
elif self.book_format == 'pdf':
# *** This needs testing
displayed_location = location
user_notes[location] = dict(id=self.id,
displayed_location=displayed_location,
type=e_type,
text=text)
if text_len == 0xFFFFFFFF:
e_base = e_base + 14
else:
e_base = e_base + 14 + 2 + text_len
current_entry += 1
for location in user_notes:
if location == self.last_read:
user_notes.pop(location)
break
else:
print "unsupported bookmark_extension: %s" % self.bookmark_extension
self.user_notes = user_notes
'''
for location in sorted(user_notes):
print ' Location %d: %s\n%s' % self.magicKindleLocationCalculator(location),
print ' Location %d: %s\n%s' % (user_notes[location]['displayed_location'],
user_notes[location]['type'],
'\n'.join(self.textdump(user_notes[location]['text'])))
'''
self.user_notes = user_notes
def get_book_length(self, path, formats):
# This assumes only one of the possible formats exists on the Kindle
book_fs = None
for format in formats:
fmt = format.rpartition('.')[2]
book_fs = path.replace('.mbp','.%s' % fmt)
if os.path.exists(book_fs):
self.book_format = fmt
break
else:
#print "no files matching library formats exist on device"
def get_book_length(self, path):
from calibre.ebooks.metadata.mobi import StreamSlicer
book_fs = path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
self.book_length = 0
return
if self.bookmark_extension == 'mbp':
# Read the book len from the header
with open(book_fs,'rb') as f:
self.stream = StringIO(f.read())
@ -264,3 +337,24 @@ class Bookmark():
self.nrecs, = unpack('>H', self.data[76:78])
record0 = self.record(0)
self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
elif self.bookmark_extension == 'tan':
# Read bookLength from metadata
with open(book_fs,'rb') as f:
stream = StringIO(f.read())
raw = stream.read(8*1024)
if not raw.startswith('TPZ'):
raise ValueError('Not a Topaz file')
first = raw.find('metadata')
if first < 0:
raise ValueError('Invalid Topaz file')
second = raw.find('metadata', first+10)
if second < 0:
raise ValueError('Invalid Topaz file')
raw = raw[second:second+1000]
idx = raw.find('bookLength')
if idx > -1:
length = ord(raw[idx+len('bookLength')])
self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length])
else:
print "unsupported bookmark_extension: %s" % bookmark_extension

View File

@ -18,7 +18,6 @@ import re
import sys
import glob
from collections import namedtuple
from itertools import repeat
from calibre.devices.interface import DevicePlugin
@ -90,8 +89,6 @@ class Device(DeviceConfig, DevicePlugin):
EBOOK_DIR_CARD_B = ''
DELETE_EXTS = []
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
UserAnnotation = namedtuple('Annotation','type, bookmark')
def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None):

View File

@ -53,13 +53,15 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
"x-sjis" : "shift-jis" }
def force_encoding(raw, verbose):
def force_encoding(raw, verbose, assume_utf8=False):
from calibre.constants import preferred_encoding
try:
chardet = detect(raw)
except:
chardet = {'encoding':preferred_encoding, 'confidence':0}
encoding = chardet['encoding']
if chardet['confidence'] < 1 and assume_utf8:
encoding = 'utf-8'
if chardet['confidence'] < 1 and verbose:
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
if not encoding:
@ -73,7 +75,7 @@ def force_encoding(raw, verbose):
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
resolve_entities=False):
resolve_entities=False, assume_utf8=False):
'''
Force conversion of byte string to unicode. Tries to look for XML/HTML
encoding declaration first, if not found uses the chardet library and
@ -95,7 +97,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
encoding = match.group(1)
break
if encoding is None:
encoding = force_encoding(raw, verbose)
encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
try:
if encoding.lower().strip() == 'macintosh':
encoding = 'mac-roman'

View File

@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re, uuid
import os, uuid
from itertools import cycle
from lxml import etree
@ -19,8 +19,7 @@ class EPUBInput(InputFormatPlugin):
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
@classmethod
def decrypt_font(cls, key, path):
def decrypt_font(self, key, path):
raw = open(path, 'rb').read()
crypt = raw[:1024]
key = cycle(iter(key))
@ -29,13 +28,18 @@ class EPUBInput(InputFormatPlugin):
f.write(decrypt)
f.write(raw[1024:])
@classmethod
def process_encryption(cls, encfile, opf, log):
def process_encryption(self, encfile, opf, log):
key = None
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
if m:
key = m.group(1)
for item in opf.identifier_iter():
scheme = None
for key in item.attrib.keys():
if key.endswith('scheme'):
scheme = item.get(key)
if (scheme and scheme.lower() == 'uuid') or \
(item.text and item.text.startswith('urn:uuid:')):
key = str(item.text).rpartition(':')[-1]
key = list(map(ord, uuid.UUID(key).bytes))
try:
root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
@ -46,7 +50,8 @@ class EPUBInput(InputFormatPlugin):
uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if os.path.exists(path):
cls.decrypt_font(key, path)
self._encrypted_font_uris.append(uri)
self.decrypt_font(key, path)
return True
except:
import traceback
@ -115,14 +120,17 @@ class EPUBInput(InputFormatPlugin):
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
if os.path.exists(encfile):
if not self.process_encryption(encfile, opf, log):
raise DRMError(os.path.basename(path))
opf = os.path.relpath(opf, os.getcwdu())
parts = os.path.split(opf)
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
self._encrypted_font_uris = []
if os.path.exists(encfile):
if not self.process_encryption(encfile, opf, log):
raise DRMError(os.path.basename(path))
self.encrypted_fonts = self._encrypted_font_uris
if len(parts) > 1 and parts[0]:
delta = '/'.join(parts[:-1])+'/'
for elem in opf.itermanifest():

View File

@ -12,8 +12,9 @@ from urllib import unquote
from calibre.customize.conversion import OutputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__
from calibre import strftime, guess_type, prepare_string_for_xml
from calibre import strftime, guess_type, prepare_string_for_xml, CurrentDir
from calibre.customize.conversion import OptionRecommendation
from calibre.constants import filesystem_encoding
from lxml import etree
@ -170,6 +171,19 @@ class EPUBOutput(OutputFormatPlugin):
self.workaround_sony_quirks()
from calibre.ebooks.oeb.base import OPF
identifiers = oeb.metadata['identifier']
uuid = None
for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
uuid = unicode(x).split(':')[-1]
break
if uuid is None:
self.log.warn('No UUID identifier found')
from uuid import uuid4
uuid = str(uuid4())
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
with TemporaryDirectory('_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb')
@ -177,10 +191,16 @@ class EPUBOutput(OutputFormatPlugin):
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
if x.endswith('.ncx')][0])
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
encryption = None
if encrypted_fonts:
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
from calibre.ebooks.epub import initialize_container
epub = initialize_container(output_path, os.path.basename(opf))
epub.add_dir(tdir)
if encryption is not None:
epub.writestr('META-INF/encryption.xml', encryption)
if opts.extract_to is not None:
if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to)
@ -189,6 +209,52 @@ class EPUBOutput(OutputFormatPlugin):
self.log.info('EPUB extracted to', opts.extract_to)
epub.close()
def encrypt_fonts(self, uris, tdir, uuid):
from binascii import unhexlify
key = re.sub(r'[^a-fA-F0-9]', '', uuid)
if len(key) < 16:
raise ValueError('UUID identifier %r is invalid'%uuid)
key = unhexlify((key + key)[:32])
key = tuple(map(ord, key))
paths = []
with CurrentDir(tdir):
paths = [os.path.join(*x.split('/')) for x in uris]
uris = dict(zip(uris, paths))
fonts = []
for uri in list(uris.keys()):
path = uris[uri]
if isinstance(path, unicode):
path = path.encode(filesystem_encoding)
if not os.path.exists(path):
uris.pop(uri)
continue
self.log.debug('Encrypting font:', uri)
with open(path, 'r+b') as f:
data = f.read(1024)
f.seek(0)
for i in range(1024):
f.write(chr(ord(data[i]) ^ key[i%16]))
if not isinstance(uri, unicode):
uri = uri.decode('utf-8')
fonts.append(u'''
<enc:EncryptedData>
<enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>
<enc:CipherData>
<enc:CipherReference URI="%s"/>
</enc:CipherData>
</enc:EncryptedData>
'''%(uri.replace('"', '\\"')))
if fonts:
ans = '''<encryption
xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
xmlns:enc="http://www.w3.org/2001/04/xmlenc#"
xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">
'''
ans += (u'\n'.join(fonts)).encode('utf-8')
ans += '\n</encryption>'
return ans
def default_cover(self):
'''
Create a generic cover for books that dont have a cover

View File

@ -779,6 +779,9 @@ class OPF(object):
self.set_text(matches[0], unicode(val))
return property(fget=fget, fset=fset)
def identifier_iter(self):
for item in self.identifier_path(self.metadata):
yield item
def guess_cover(self):
'''

View File

@ -149,7 +149,8 @@ class TOC(list):
def read_ncx_toc(self, toc):
self.base_path = os.path.dirname(toc)
soup = NCXSoup(xml_to_unicode(open(toc, 'rb').read())[0])
raw = xml_to_unicode(open(toc, 'rb').read(), assume_utf8=True)[0]
soup = NCXSoup(raw)
def process_navpoint(np, dest):
play_order = np.get('playOrder', None)
@ -160,7 +161,7 @@ class TOC(list):
if nl is not None:
text = u''
for txt in nl.findAll(re.compile('text')):
text += ''.join([unicode(s) for s in txt.findAll(text=True)])
text += u''.join([unicode(s) for s in txt.findAll(text=True)])
content = np.find(re.compile('content'))
if content is None or not content.has_key('src') or not txt:
return

View File

@ -152,13 +152,17 @@ class EbookIterator(object):
prints('Substituting font family: %s -> %s'%(bad, good))
return match.group().replace(bad, '"%s"'%good)
from calibre.ebooks.chardet import force_encoding
for csspath in css_files:
with open(csspath, 'r+b') as f:
css = f.read()
css = font_family_pat.sub(prepend_embedded_font, css)
enc = force_encoding(css, False)
css = css.decode(enc, 'replace')
ncss = font_family_pat.sub(prepend_embedded_font, css)
if ncss != css:
f.seek(0)
f.truncate()
f.write(css)
f.write(ncss.encode(enc))
def __enter__(self, processed=False):
self.delete_on_exit = []

View File

@ -331,7 +331,10 @@ class OEBReader(object):
id = child.get('id')
klass = child.get('class', 'chapter')
try:
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
except:
po = self.oeb.toc.next_play_order()
authorElement = xpath(child,
'descendant::calibre:meta[@name = "author"]')

View File

@ -27,7 +27,7 @@ class Reader(FormatReader):
self.log.info('Extracting PDF...')
with TemporaryFile() as pdf_n:
pdf = open(pdf_n, 'rw+b')
pdf = open(pdf_n, 'rwb')
for x in xrange(self.header.section_count()):
pdf.write(self.header.section_data(x))

View File

@ -18,6 +18,7 @@ from calibre.gui2 import config as gconf, error_dialog
from calibre.web.feeds.recipes.model import RecipeModel
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.date import utcnow
from calibre.utils.network import internet_connected
class SchedulerDialog(QDialog, Ui_Dialog):
@ -304,6 +305,8 @@ class Scheduler(QObject):
self.download(urn)
def download(self, urn):
if not internet_connected():
return
self.lock.lock()
doit = urn not in self.download_queue
self.lock.unlock()

View File

@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
'''The main GUI'''
import collections, datetime, os, shutil, sys, textwrap, time
from collections import namedtuple
from xml.parsers.expat import ExpatError
from Queue import Queue, Empty
from threading import Thread
@ -20,7 +19,7 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \
QToolButton, QDialog, QDesktopServices, QFileDialog, \
QSystemTrayIcon, QApplication, QKeySequence, QAction, \
QMessageBox, QStackedLayout, QHelpEvent, QInputDialog,\
QThread
QThread, pyqtSignal
from PyQt4.QtSvg import QSvgRenderer
from calibre import prints, patheq, strftime
@ -57,7 +56,6 @@ from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.gui2.dialogs.book_info import BookInfo
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
from calibre.library.cli import send_message as calibre_send_message
from calibre.library.database2 import LibraryDatabase2, CoverCache
from calibre.gui2.dialogs.confirm_delete import confirm
@ -928,9 +926,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
######################### Fetch annotations ################################
def fetch_annotations(self, *args):
# Figure out a list of ids using the same logic as the catalog generation
# FUnction. Use the currently connected device to map ids to paths
# Generate a path_map from selected ids
def get_ids_from_selected_rows():
rows = self.library_view.selectionModel().selectedRows()
if not rows or len(rows) < 2:
@ -938,15 +934,22 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
ids = map(self.library_view.model().id, rows)
return ids
def get_formats(id):
book_data = db.get_data_as_dict(ids=[id])[0]
formats = book_data['formats']
fmts = []
for format in formats:
fmts.append(format.rpartition('.')[2])
return fmts
def generate_annotation_paths(ids, db, device):
# Generate a dict {1:'documents/documents/Asimov, Isaac/Foundation - Isaac Asimov.epub'}
# These are the not the absolute paths - individual storage mount points will need to be
# prepended during the search
# Generate path templates
# Individual storage mount points scanned/resolved in driver.get_annotations()
path_map = {}
for id in ids:
mi = db.get_metadata(id, index_is_id=True)
a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.mbp', create_dirs=False)
path_map[id] = a_path
a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.bookmark', create_dirs=False)
path_map[id] = dict(path=a_path, fmts=get_formats(id))
return path_map
device = self.device_manager.device
@ -976,16 +979,24 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
from calibre.gui2.dialogs.progress import ProgressDialog
class Updater(QThread):
def __init__(self, parent, db, annotation_map):
update_progress = pyqtSignal(int)
update_done = pyqtSignal()
def __init__(self, parent, db, annotation_map, done_callback):
QThread.__init__(self, parent)
self.db = db
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
0, len(job.result), parent=parent)
self.am = annotation_map
self.done_callback = done_callback
self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
self.pd.setModal(True)
self.pd.show()
self.update_progress.connect(self.pd.set_value,
type=Qt.QueuedConnection)
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
def generate_annotation_html(self, bookmark):
# Returns <div class="user_annotations"> ... </div>
@ -1003,8 +1014,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
spanTag['style'] = 'font-weight:bold'
spanTag.insert(0,NavigableString("%s<br />Last Page Read: Location %d (%d%%)" % \
(strftime(u'%x', timestamp.timetuple()),
last_read_location/150 + 1,
percent_read)))
last_read_location, percent_read)))
divTag.insert(dtc, spanTag)
dtc += 1
@ -1020,14 +1030,14 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
for location in sorted(user_notes):
if user_notes[location]['text']:
annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
(location/150 + 1,
(user_notes[location]['displayed_location'],
user_notes[location]['type'],
user_notes[location]['text'] if \
user_notes[location]['type'] == 'Note' else \
'<i>%s</i>' % user_notes[location]['text']))
else:
annotations.append('<b>Location %d &bull; %s</b><br />' % \
(location/150 + 1,
(user_notes[location]['displayed_location'],
user_notes[location]['type']))
for annotation in annotations:
@ -1040,13 +1050,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def canceled(self):
self.pd.hide()
def start(self):
QApplication.processEvents()
def run(self):
for (i, id) in enumerate(self.am):
bm = Device.UserAnnotation(self.am[id][0],self.am[id][1])
user_notes_soup = self.generate_annotation_html(bm.bookmark)
mi = self.db.get_metadata(id, index_is_id=True)
if mi.comments:
a_offset = mi.comments.find('<div class="user_annotations">')
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
@ -1060,13 +1070,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
user_notes_soup.insert(0,hrTag)
mi.comments += user_notes_soup.prettify()
else:
mi.comments = unicode(user_notes_soup.prettify())
# Update library comments
self.db.set_comment(id, mi.comments)
self.pd.set_value(i)
self.pd.hide()
calibre_send_message()
self.update_progress.emit(i)
self.update_done.emit()
self.done_callback(self.am.keys())
if not job.result: return
@ -1076,9 +1086,9 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
show=True)
db = self.library_view.model().db
self.__annotation_updater = Updater(self, db, job.result)
self.__annotation_updater = Updater(self, db, job.result,
Dispatcher(self.library_view.model().refresh_ids))
self.__annotation_updater.start()
return
############################################################################

View File

@ -1175,14 +1175,18 @@ class EPUB_MOBI(CatalogPlugin):
Preferences|Add/Save|Sending to device, not a customized one specified in
the Kindle plugin
'''
from cStringIO import StringIO
from struct import unpack
from calibre.devices.usbms.device import Device
from calibre.devices.kindle.driver import Bookmark
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.mobi import StreamSlicer
MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt']
TAN_FORMATS = [u'tpz', u'azw1']
mbp_formats = set()
for fmt in MBP_FORMATS:
mbp_formats.add(fmt)
tan_formats = set()
for fmt in TAN_FORMATS:
tan_formats.add(fmt)
class BookmarkDevice(Device):
def initialize(self, save_template):
@ -1191,6 +1195,40 @@ class EPUB_MOBI(CatalogPlugin):
def save_template(self):
return self._save_template
def resolve_bookmark_paths(storage, path_map):
pop_list = []
book_ext = {}
for id in path_map:
file_fmts = set()
for fmt in path_map[id]['fmts']:
file_fmts.add(fmt)
bookmark_extension = None
if file_fmts.intersection(mbp_formats):
book_extension = list(file_fmts.intersection(mbp_formats))[0]
bookmark_extension = 'mbp'
elif file_fmts.intersection(tan_formats):
book_extension = list(file_fmts.intersection(tan_formats))[0]
bookmark_extension = 'tan'
if bookmark_extension:
for vol in storage:
bkmk_path = path_map[id]['path'].replace(os.path.abspath('/<storage>'),vol)
bkmk_path = bkmk_path.replace('bookmark',bookmark_extension)
print "looking for %s" % bkmk_path
if os.path.exists(bkmk_path):
path_map[id] = bkmk_path
book_ext[id] = book_extension
break
else:
pop_list.append(id)
else:
pop_list.append(id)
# Remove non-existent bookmark templates
for id in pop_list:
path_map.pop(id)
return path_map, book_ext
if self.generateRecentlyRead:
self.opts.log.info(" Collecting Kindle bookmarks matching catalog entries")
@ -1199,26 +1237,32 @@ class EPUB_MOBI(CatalogPlugin):
bookmarks = {}
for book in self.booksByTitle:
if 'formats' in book:
path_map = {}
id = book['id']
original_title = book['title'][book['title'].find(':') + 2:] if book['series'] \
else book['title']
myMeta = MetaInformation(original_title,
authors=book['authors'])
myMeta.author_sort = book['author_sort']
bm_found = False
for vol in self.opts.connected_device['storage']:
bm_path = d.create_upload_path(vol, myMeta, 'x.mbp', create_dirs=False)
if os.path.exists(bm_path):
myBookmark = Bookmark(bm_path, book['formats'], book['id'])
a_path = d.create_upload_path('/<storage>', myMeta, 'x.bookmark', create_dirs=False)
path_map[id] = dict(path=a_path, fmts=[x.rpartition('.')[2] for x in book['formats']])
path_map, book_ext = resolve_bookmark_paths(self.opts.connected_device['storage'], path_map)
if path_map:
bookmark_ext = path_map[id].rpartition('.')[2]
myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
print "book: %s\nlast_read_location: %d\nlength: %d" % (book['title'],
myBookmark.last_read_location,
myBookmark.book_length)
if myBookmark.book_length:
book['percent_read'] = float(100*myBookmark.last_read_location / myBookmark.book_length)
dots = int((book['percent_read'] + 5)/10)
dot_string = self.READ_PROGRESS_SYMBOL * dots
empty_dots = self.UNREAD_PROGRESS_SYMBOL * (10 - dots)
book['reading_progress'] = '%s%s' % (dot_string,empty_dots)
bookmarks[book['id']] = ((myBookmark,book))
bm_found = True
if bm_found:
break
bookmarks[id] = ((myBookmark,book))
self.bookmarked_books = bookmarks
else:
self.bookmarked_books = {}

View File

@ -20,10 +20,10 @@ try:
except ImportError:
import Image as PILImage
from calibre.constants import __version__, __appname__
from calibre.constants import __version__, __appname__, iswindows
from calibre.utils.genshi.template import MarkupTemplate
from calibre import fit_image, guess_type, prepare_string_for_xml, \
strftime as _strftime, prints
strftime as _strftime
from calibre.library import server_config as config
from calibre.library.database2 import LibraryDatabase2, FIELD_MAP
from calibre.utils.config import config_dir
@ -423,10 +423,8 @@ class LibraryServer(object):
self.opts.port, {'path':'/stanza'})
except:
import traceback
print 'Failed to start BonJour:'
cherrypy.log('Failed to start BonJour:')
cherrypy.log(traceback.format_exc())
traceback.print_exc()
cherrypy.log.error('Failed to start BonJour:')
cherrypy.log.error(traceback.format_exc())
cherrypy.engine.block()
except Exception, e:
self.exception = e
@ -436,10 +434,8 @@ class LibraryServer(object):
stop_zeroconf()
except:
import traceback
print 'Failed to stop BonJour:'
cherrypy.log('Failed to stop BonJour:')
cherrypy.log(traceback.format_exc())
traceback.print_exc()
cherrypy.log.error('Failed to stop BonJour:')
cherrypy.log.error(traceback.format_exc())
def exit(self):
cherrypy.engine.exit()
@ -472,7 +468,8 @@ class LibraryServer(object):
return of.getvalue()
except Exception, err:
import traceback
traceback.print_exc()
cherrypy.log.error('Failed to generate cover:')
cherrypy.log.error(traceback.print_exc())
raise cherrypy.HTTPError(404, 'Failed to generate cover: %s'%err)
def get_format(self, id, format):
@ -813,7 +810,7 @@ class LibraryServer(object):
# A better search would be great
want_mobile = self.MOBILE_UA.search(ua) is not None
if self.opts.develop and not want_mobile:
prints('User agent:', ua)
cherrypy.log('User agent: '+ua)
if want_opds:
return self.stanza(search=kwargs.get('search', None), sortby=kwargs.get('sortby',None), authorid=kwargs.get('authorid',None),
@ -882,12 +879,55 @@ def option_parser():
parser = config().option_parser('%prog '+ _('[options]\n\nStart the calibre content server.'))
parser.add_option('--with-library', default=None,
help=_('Path to the library folder to serve with the content server'))
parser.add_option('--pidfile', default=None,
help=_('Write process PID to the specified file'))
parser.add_option('--daemonize', default=False, action='store_true',
help='Run process in background as a daemon. No effect on windows.')
return parser
def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError, e:
print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror)
sys.exit(1)
# decouple from parent environment
os.chdir("/")
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError, e:
print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror)
sys.exit(1)
# Redirect standard file descriptors.
si = file(stdin, 'r')
so = file(stdout, 'a+')
se = file(stderr, 'a+', 0)
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if opts.daemonize and not iswindows:
daemonize()
if opts.pidfile is not None:
with open(opts.pidfile, 'wb') as f:
f.write(str(os.getpid()))
cherrypy.log.screen = True
from calibre.utils.config import prefs
if opts.with_library is None:

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.constants import iswindows, islinux, isfreebsd
class LinuxNetworkStatus(object):
def __init__(self):
try:
import dbus
bus = dbus.SystemBus()
proxy = bus.get_object("org.freedesktop.NetworkManager",
"/org/freedesktop/NetworkManager")
self.manager = dbus.Interface(proxy, "org.freedesktop.DBus.Properties")
except:
self.manager = None
def __call__(self):
if self.manager is None:
return True
try:
connections = self.manager.Get("org.freedesktop.NetworkManager",
"ActiveConnections")
return len(connections) > 0
except:
return True
class WindowsNetworkStatus(object):
def __init__(self):
from calibre.constants import plugins
self.winutil = plugins['winutil'][0]
def __call__(self):
if self.winutil is None:
return True
return self.winutil.internet_connected()
class DummyNetworkStatus(object):
def __call__(self):
return True
_network_status = WindowsNetworkStatus() if iswindows else \
LinuxNetworkStatus() if (islinux or isfreebsd) else \
DummyNetworkStatus()
def internet_connected():
return _network_status()

View File

@ -51,11 +51,15 @@ wherever possible in this module.
script being run. So to replace sys.argv, you should use
`if len(sys.argv) > 1: sys.argv[1:] = winutil.argv()[1-len(sys.argv):]`
.. function:: internet_connected() -> Return True if there is an active
internet connection.
*/
#define UNICODE
#include <Windows.h>
#include <Wininet.h>
#include <Python.h>
#include <structseq.h>
#include <timefuncs.h>
@ -771,6 +775,15 @@ gettmarg(PyObject *args, struct tm *p)
return 1;
}
static PyObject *
winutil_internet_connected(PyObject *self, PyObject *args) {
DWORD flags;
BOOL ans = InternetGetConnectedState(&flags, 0);
if (ans) Py_RETURN_TRUE;
Py_RETURN_FALSE;
}
static PyObject *
winutil_strftime(PyObject *self, PyObject *args)
{
@ -919,6 +932,10 @@ be a unicode string. Returns unicode strings."
"eject_drive(drive_letter)\n\nEject a drive. Raises an exception on failure."
},
{"internet_connected", winutil_internet_connected, METH_VARARGS,
"internet_connected()\n\nReturn True if there is an active internet connection"
},
{NULL, NULL, 0, NULL}
};