mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Pull from driver-dev
This commit is contained in:
commit
2d2fadcbd6
@ -403,9 +403,8 @@ def fix_ids(main, carda, cardb):
|
||||
for child in db.root_element.childNodes:
|
||||
if child.nodeType == child.ELEMENT_NODE and child.hasAttribute('id'):
|
||||
id_map[child.getAttribute('id')] = str(cid)
|
||||
child.setAttribute('sourceid', '1')
|
||||
#child.setAttribute("sourceid",
|
||||
# '0' if getattr(child, 'tagName', '').endswith('playlist') else '1')
|
||||
child.setAttribute("sourceid",
|
||||
'0' if getattr(child, 'tagName', '').endswith('playlist') else '1')
|
||||
child.setAttribute('id', str(cid))
|
||||
cid += 1
|
||||
|
||||
|
@ -180,16 +180,25 @@ class PRS505(CLI, Device):
|
||||
|
||||
return zip(paths, sizes, ctimes, cycle([on_card]))
|
||||
|
||||
@classmethod
|
||||
def add_books_to_metadata(cls, locations, metadata, booklists):
|
||||
def add_books_to_metadata(self, locations, metadata, booklists):
|
||||
metadata = iter(metadata)
|
||||
for location in locations:
|
||||
info = metadata.next()
|
||||
path = location[0]
|
||||
blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0
|
||||
name = path.rpartition(os.sep)[2]
|
||||
name = (cls.CARD_PATH_PREFIX+'/' if blist else 'database/media/books/') + name
|
||||
|
||||
if path.startswith(self._main_prefix):
|
||||
name = path.replace(self._main_prefix, '')
|
||||
elif path.startswith(self._card_a_prefix):
|
||||
name = path.replace(self._card_a_prefix, '')
|
||||
elif path.startswith(self._card_b_prefix):
|
||||
name = path.replace(self._card_b_prefix, '')
|
||||
|
||||
name = name.replace('\\', '/')
|
||||
name = name.replace('//', '/')
|
||||
if name.startswith('/'):
|
||||
name = name[1:]
|
||||
|
||||
booklists[blist].add_book(info, name, *location[1:-1])
|
||||
fix_ids(*booklists)
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, functools
|
||||
import functools
|
||||
import re
|
||||
|
||||
from calibre import entity_to_unicode
|
||||
|
||||
@ -52,7 +52,12 @@ def line_length(raw, percent):
|
||||
for line in lines:
|
||||
if len(line) > 0:
|
||||
lengths.append(len(line))
|
||||
|
||||
if not lengths:
|
||||
return 0
|
||||
|
||||
total = sum(lengths)
|
||||
print total
|
||||
avg = total / len(lengths)
|
||||
max_line = avg * 2
|
||||
|
||||
@ -178,10 +183,13 @@ class HTMLPreProcessor(object):
|
||||
elif self.is_book_designer(html):
|
||||
rules = self.BOOK_DESIGNER
|
||||
elif self.is_pdftohtml(html):
|
||||
line_length_rules = [
|
||||
# Un wrap using punctuation
|
||||
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines),
|
||||
]
|
||||
length = line_length(html, .3)
|
||||
line_length_rules = []
|
||||
if length:
|
||||
line_length_rules = [
|
||||
# Un wrap using punctuation
|
||||
(re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % length, re.UNICODE), wrap_lines),
|
||||
]
|
||||
|
||||
rules = self.PDFTOHTML + line_length_rules
|
||||
else:
|
||||
|
@ -82,10 +82,6 @@ class FB2MLizer(object):
|
||||
return images
|
||||
|
||||
def clean_text(self, text):
|
||||
for entity in set(re.findall('&.+?;', text)):
|
||||
mo = re.search('(%s)' % entity[1:-1], text)
|
||||
text = text.replace(entity, entity_to_unicode(mo))
|
||||
|
||||
text = text.replace('&', '')
|
||||
|
||||
return text
|
||||
|
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
@ -17,17 +17,33 @@ class PDFInput(InputFormatPlugin):
|
||||
description = 'Convert PDF files to HTML'
|
||||
file_types = set(['pdf'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='no_images', recommended_value=False,
|
||||
help=_('Do not extract images from the document')),
|
||||
])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
html = pdftohtml(stream.name)
|
||||
|
||||
with open('index.html', 'wb') as index:
|
||||
index.write(html)
|
||||
# The main html file will be named index.html
|
||||
pdftohtml(os.getcwd(), stream.name, options.no_images)
|
||||
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
mi = get_metadata(stream, 'pdf')
|
||||
opf = OPFCreator(os.getcwd(), mi)
|
||||
opf.create_manifest([('index.html', None)])
|
||||
|
||||
manifest = [('index.html', None)]
|
||||
|
||||
images = os.listdir(os.getcwd())
|
||||
images.remove('index.html')
|
||||
for i in images:
|
||||
# Remove the - from the file name because it causes problems.
|
||||
# The referenec to the image with the - will be changed to not
|
||||
# include it later in the conversion process.
|
||||
new_i = i.replace('-', '')
|
||||
os.rename(i, new_i)
|
||||
manifest.append((new_i, None))
|
||||
opf.create_manifest(manifest)
|
||||
|
||||
opf.create_spine(['index.html'])
|
||||
with open('metadata.opf', 'wb') as opffile:
|
||||
opf.render(opffile)
|
||||
|
@ -14,7 +14,6 @@ from functools import partial
|
||||
from calibre.ebooks import ConversionError, DRMError
|
||||
from calibre import isosx, iswindows, islinux
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
|
||||
PDFTOHTML = 'pdftohtml'
|
||||
popen = subprocess.Popen
|
||||
@ -26,10 +25,11 @@ if iswindows and hasattr(sys, 'frozen'):
|
||||
if islinux and getattr(sys, 'frozen_path', False):
|
||||
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
|
||||
|
||||
def pdftohtml(pdf_path):
|
||||
def pdftohtml(output_dir, pdf_path, no_images):
|
||||
'''
|
||||
Convert the pdf into html using the pdftohtml app.
|
||||
@return: The HTML as a unicode string.
|
||||
This will write the html as index.html into output_dir.
|
||||
It will also wirte all extracted images to the output_dir
|
||||
'''
|
||||
|
||||
if isinstance(pdf_path, unicode):
|
||||
@ -37,41 +37,41 @@ def pdftohtml(pdf_path):
|
||||
if not os.access(pdf_path, os.R_OK):
|
||||
raise ConversionError('Cannot read from ' + pdf_path)
|
||||
|
||||
with TemporaryDirectory('_pdftohtml') as tdir:
|
||||
index = os.path.join(tdir, 'index.html')
|
||||
with CurrentDir(output_dir):
|
||||
index = os.path.join(os.getcwd(), 'index.html')
|
||||
# This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
|
||||
pdf_path = os.path.abspath(pdf_path)
|
||||
cmd = (PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-i', '-q', pdf_path, os.path.basename(index))
|
||||
cwd = os.getcwd()
|
||||
cmd = [PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', pdf_path, os.path.basename(index)]
|
||||
if no_images:
|
||||
cmd.append('-i')
|
||||
|
||||
with CurrentDir(tdir):
|
||||
try:
|
||||
p = popen(cmd, stderr=subprocess.PIPE)
|
||||
except OSError, err:
|
||||
if err.errno == 2:
|
||||
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
|
||||
else:
|
||||
raise
|
||||
|
||||
while True:
|
||||
try:
|
||||
p = popen(cmd, stderr=subprocess.PIPE)
|
||||
except OSError, err:
|
||||
if err.errno == 2:
|
||||
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
|
||||
ret = p.wait()
|
||||
break
|
||||
except OSError, e:
|
||||
if e.errno == errno.EINTR:
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
while True:
|
||||
try:
|
||||
ret = p.wait()
|
||||
break
|
||||
except OSError, e:
|
||||
if e.errno == errno.EINTR:
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
if ret != 0:
|
||||
err = p.stderr.read()
|
||||
raise ConversionError(err)
|
||||
if not os.path.exists(index) or os.stat(index).st_size < 100:
|
||||
raise DRMError()
|
||||
|
||||
if ret != 0:
|
||||
err = p.stderr.read()
|
||||
raise ConversionError(err)
|
||||
if not os.path.exists(index) or os.stat(index).st_size < 100:
|
||||
raise DRMError()
|
||||
|
||||
with open(index, 'rb') as i:
|
||||
raw = i.read()
|
||||
if not '<br' in raw[:4000]:
|
||||
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'))
|
||||
|
||||
return '<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||
with open(index, 'rb+wb') as i:
|
||||
raw = i.read()
|
||||
raw = '<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||
i.seek(0)
|
||||
i.truncate()
|
||||
i.write(raw)
|
||||
|
@ -227,6 +227,7 @@ class LocationModel(QAbstractListModel):
|
||||
self.free[1] = fs[1] if fs[1] is not None and cpa is not None else -1
|
||||
self.free[2] = fs[2] if fs[2] is not None and cpb is not None else -1
|
||||
self.reset()
|
||||
self.emit(SIGNAL('devicesChanged()'))
|
||||
|
||||
def location_changed(self, row):
|
||||
self.highlight_row = row
|
||||
@ -253,6 +254,7 @@ class LocationView(QListView):
|
||||
self.connect(self, SIGNAL('entered(QModelIndex)'), self.item_entered)
|
||||
self.connect(self, SIGNAL('viewportEntered()'), self.viewport_entered)
|
||||
self.connect(self.eject_button, SIGNAL('clicked()'), lambda: self.emit(SIGNAL('umount_device()')))
|
||||
self.connect(self.model(), SIGNAL('devicesChanged()'), self.eject_button.hide)
|
||||
|
||||
def count_changed(self, new_count):
|
||||
self.model().count = new_count
|
||||
|
Loading…
x
Reference in New Issue
Block a user