Pull from driver-dev

This commit is contained in:
Kovid Goyal 2009-06-20 19:46:33 -07:00
commit 2d2fadcbd6
7 changed files with 86 additions and 56 deletions

View File

@ -403,9 +403,8 @@ def fix_ids(main, carda, cardb):
for child in db.root_element.childNodes: for child in db.root_element.childNodes:
if child.nodeType == child.ELEMENT_NODE and child.hasAttribute('id'): if child.nodeType == child.ELEMENT_NODE and child.hasAttribute('id'):
id_map[child.getAttribute('id')] = str(cid) id_map[child.getAttribute('id')] = str(cid)
child.setAttribute('sourceid', '1') child.setAttribute("sourceid",
#child.setAttribute("sourceid", '0' if getattr(child, 'tagName', '').endswith('playlist') else '1')
# '0' if getattr(child, 'tagName', '').endswith('playlist') else '1')
child.setAttribute('id', str(cid)) child.setAttribute('id', str(cid))
cid += 1 cid += 1

View File

@ -180,16 +180,25 @@ class PRS505(CLI, Device):
return zip(paths, sizes, ctimes, cycle([on_card])) return zip(paths, sizes, ctimes, cycle([on_card]))
@classmethod def add_books_to_metadata(self, locations, metadata, booklists):
def add_books_to_metadata(cls, locations, metadata, booklists):
metadata = iter(metadata) metadata = iter(metadata)
for location in locations: for location in locations:
info = metadata.next() info = metadata.next()
path = location[0] path = location[0]
blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0 blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0
name = path.rpartition(os.sep)[2]
name = (cls.CARD_PATH_PREFIX+'/' if blist else 'database/media/books/') + name if path.startswith(self._main_prefix):
name = path.replace(self._main_prefix, '')
elif path.startswith(self._card_a_prefix):
name = path.replace(self._card_a_prefix, '')
elif path.startswith(self._card_b_prefix):
name = path.replace(self._card_b_prefix, '')
name = name.replace('\\', '/')
name = name.replace('//', '/') name = name.replace('//', '/')
if name.startswith('/'):
name = name[1:]
booklists[blist].add_book(info, name, *location[1:-1]) booklists[blist].add_book(info, name, *location[1:-1])
fix_ids(*booklists) fix_ids(*booklists)

View File

@ -1,12 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, functools import functools
import re
from calibre import entity_to_unicode from calibre import entity_to_unicode
@ -52,7 +52,12 @@ def line_length(raw, percent):
for line in lines: for line in lines:
if len(line) > 0: if len(line) > 0:
lengths.append(len(line)) lengths.append(len(line))
if not lengths:
return 0
total = sum(lengths) total = sum(lengths)
print total
avg = total / len(lengths) avg = total / len(lengths)
max_line = avg * 2 max_line = avg * 2
@ -178,9 +183,12 @@ class HTMLPreProcessor(object):
elif self.is_book_designer(html): elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html): elif self.is_pdftohtml(html):
length = line_length(html, .3)
line_length_rules = []
if length:
line_length_rules = [ line_length_rules = [
# Un wrap using punctuation # Un wrap using punctuation
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines), (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines),
] ]
rules = self.PDFTOHTML + line_length_rules rules = self.PDFTOHTML + line_length_rules

View File

@ -82,10 +82,6 @@ class FB2MLizer(object):
return images return images
def clean_text(self, text): def clean_text(self, text):
for entity in set(re.findall('&.+?;', text)):
mo = re.search('(%s)' % entity[1:-1], text)
text = text.replace(entity, entity_to_unicode(mo))
text = text.replace('&', '') text = text.replace('&', '')
return text return text

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
import os import os
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.pdf.pdftohtml import pdftohtml from calibre.ebooks.pdf.pdftohtml import pdftohtml
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
@ -17,17 +17,33 @@ class PDFInput(InputFormatPlugin):
description = 'Convert PDF files to HTML' description = 'Convert PDF files to HTML'
file_types = set(['pdf']) file_types = set(['pdf'])
options = set([
OptionRecommendation(name='no_images', recommended_value=False,
help=_('Do not extract images from the document')),
])
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
html = pdftohtml(stream.name) # The main html file will be named index.html
pdftohtml(os.getcwd(), stream.name, options.no_images)
with open('index.html', 'wb') as index:
index.write(html)
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(stream, 'pdf') mi = get_metadata(stream, 'pdf')
opf = OPFCreator(os.getcwd(), mi) opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.html', None)])
manifest = [('index.html', None)]
images = os.listdir(os.getcwd())
images.remove('index.html')
for i in images:
# Remove the - from the file name because it causes problems.
# The referenec to the image with the - will be changed to not
# include it later in the conversion process.
new_i = i.replace('-', '')
os.rename(i, new_i)
manifest.append((new_i, None))
opf.create_manifest(manifest)
opf.create_spine(['index.html']) opf.create_spine(['index.html'])
with open('metadata.opf', 'wb') as opffile: with open('metadata.opf', 'wb') as opffile:
opf.render(opffile) opf.render(opffile)

View File

@ -14,7 +14,6 @@ from functools import partial
from calibre.ebooks import ConversionError, DRMError from calibre.ebooks import ConversionError, DRMError
from calibre import isosx, iswindows, islinux from calibre import isosx, iswindows, islinux
from calibre import CurrentDir from calibre import CurrentDir
from calibre.ptempfile import TemporaryDirectory
PDFTOHTML = 'pdftohtml' PDFTOHTML = 'pdftohtml'
popen = subprocess.Popen popen = subprocess.Popen
@ -26,10 +25,11 @@ if iswindows and hasattr(sys, 'frozen'):
if islinux and getattr(sys, 'frozen_path', False): if islinux and getattr(sys, 'frozen_path', False):
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml') PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
def pdftohtml(pdf_path): def pdftohtml(output_dir, pdf_path, no_images):
''' '''
Convert the pdf into html using the pdftohtml app. Convert the pdf into html using the pdftohtml app.
@return: The HTML as a unicode string. This will write the html as index.html into output_dir.
It will also wirte all extracted images to the output_dir
''' '''
if isinstance(pdf_path, unicode): if isinstance(pdf_path, unicode):
@ -37,14 +37,14 @@ def pdftohtml(pdf_path):
if not os.access(pdf_path, os.R_OK): if not os.access(pdf_path, os.R_OK):
raise ConversionError('Cannot read from ' + pdf_path) raise ConversionError('Cannot read from ' + pdf_path)
with TemporaryDirectory('_pdftohtml') as tdir: with CurrentDir(output_dir):
index = os.path.join(tdir, 'index.html') index = os.path.join(os.getcwd(), 'index.html')
# This is neccessary as pdftohtml doesn't always (linux) respect absolute paths # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
pdf_path = os.path.abspath(pdf_path) pdf_path = os.path.abspath(pdf_path)
cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-i', '-q', pdf_path, os.path.basename(index)) cmd = [PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', pdf_path, os.path.basename(index)]
cwd = os.getcwd() if no_images:
cmd.append('-i')
with CurrentDir(tdir):
try: try:
p = popen(cmd, stderr=subprocess.PIPE) p = popen(cmd, stderr=subprocess.PIPE)
except OSError, err: except OSError, err:
@ -69,9 +69,9 @@ def pdftohtml(pdf_path):
if not os.path.exists(index) or os.stat(index).st_size < 100: if not os.path.exists(index) or os.stat(index).st_size < 100:
raise DRMError() raise DRMError()
with open(index, 'rb') as i: with open(index, 'rb+wb') as i:
raw = i.read() raw = i.read()
if not '<br' in raw[:4000]: raw = '<!-- created by calibre\'s pdftohtml -->\n' + raw
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.')) i.seek(0)
i.truncate()
return '<!-- created by calibre\'s pdftohtml -->\n' + raw i.write(raw)

View File

@ -227,6 +227,7 @@ class LocationModel(QAbstractListModel):
self.free[1] = fs[1] if fs[1] is not None and cpa is not None else -1 self.free[1] = fs[1] if fs[1] is not None and cpa is not None else -1
self.free[2] = fs[2] if fs[2] is not None and cpb is not None else -1 self.free[2] = fs[2] if fs[2] is not None and cpb is not None else -1
self.reset() self.reset()
self.emit(SIGNAL('devicesChanged()'))
def location_changed(self, row): def location_changed(self, row):
self.highlight_row = row self.highlight_row = row
@ -253,6 +254,7 @@ class LocationView(QListView):
self.connect(self, SIGNAL('entered(QModelIndex)'), self.item_entered) self.connect(self, SIGNAL('entered(QModelIndex)'), self.item_entered)
self.connect(self, SIGNAL('viewportEntered()'), self.viewport_entered) self.connect(self, SIGNAL('viewportEntered()'), self.viewport_entered)
self.connect(self.eject_button, SIGNAL('clicked()'), lambda: self.emit(SIGNAL('umount_device()'))) self.connect(self.eject_button, SIGNAL('clicked()'), lambda: self.emit(SIGNAL('umount_device()')))
self.connect(self.model(), SIGNAL('devicesChanged()'), self.eject_button.hide)
def count_changed(self, new_count): def count_changed(self, new_count):
self.model().count = new_count self.model().count = new_count