KG updates, including patch to catalog generator for thumb.zip problem

This commit is contained in:
GRiker 2011-06-20 04:08:56 -06:00
commit 5f84407b6e
16 changed files with 170 additions and 73 deletions

View File

@ -1,3 +1,4 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
@ -10,6 +11,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
remove_empty_feeds = True
remove_javascript = True
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
language = 'en_GB'

View File

@ -26,6 +26,7 @@ class Perfil(BasicNewsRecipe):
.foto1 h1{font-size: x-small}
h1{font-family: Georgia,"Times New Roman",serif}
img{margin-bottom: 0.4em}
.hora{font-size: x-small; color: red}
"""
conversion_options = {
@ -60,7 +61,26 @@ class Perfil(BasicNewsRecipe):
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
]
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -51,7 +51,7 @@ class WallStreetJournal(BasicNewsRecipe):
br['password'] = self.password
res = br.submit()
raw = res.read()
if 'Welcome,' not in raw:
if 'Welcome,' not in raw and '>Logout<' not in raw:
raise ValueError('Failed to log in to wsj.com, check your '
'username and password')
return br

View File

@ -61,7 +61,7 @@ class LIBREAIR(N516):
BCD = [0x399]
VENDOR_NAME = 'ALURATEK'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '_FILE-STOR_GADGET'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
EBOOK_DIR_MAIN = 'Books'
class ALEX(N516):

View File

@ -457,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
href=bhref)
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME
if 'text' in media_type:
if media_type == 'text/plain':
self.log.warn('Ignoring link to text file %r'%link_)
return None

View File

@ -1055,6 +1055,12 @@ class Manifest(object):
and len(a) == 0 and not a.text:
remove_elem(a)
# Convert <br>s with content into paragraphs as ADE can't handle
# them
for br in xpath(data, '//h:br'):
if len(br) > 0 or br.text:
br.tag = XHTML('div')
return data
def _parse_txt(self, data):
@ -1156,7 +1162,7 @@ class Manifest(object):
data = self._parse_xml(data)
elif self.media_type.lower() in OEB_STYLES:
data = self._parse_css(data)
elif 'text' in self.media_type.lower():
elif self.media_type.lower() == 'text/plain':
self.oeb.log.warn('%s contains data in TXT format'%self.href,
'converting to HTML')
data = self._parse_txt(data)

View File

@ -119,6 +119,7 @@ class DeviceManager(Thread): # {{{
self.sleep_time = sleep_time
self.connected_slot = connected_slot
self.jobs = Queue.Queue(0)
self.job_steps = Queue.Queue(0)
self.keep_going = True
self.job_manager = job_manager
self.reported_errors = set([])
@ -235,6 +236,12 @@ class DeviceManager(Thread): # {{{
self.connected_device.unmount_device()
def next(self):
if not self.job_steps.empty():
try:
return self.job_steps.get_nowait()
except Queue.Empty:
pass
if not self.jobs.empty():
try:
return self.jobs.get_nowait()
@ -271,13 +278,20 @@ class DeviceManager(Thread): # {{{
break
time.sleep(self.sleep_time)
def create_job(self, func, done, description, args=[], kwargs={}):
def create_job_step(self, func, done, description, to_job, args=[], kwargs={}):
job = DeviceJob(func, done, self.job_manager,
args=args, kwargs=kwargs, description=description)
self.job_manager.add_job(job)
if (done is None or isinstance(done, FunctionDispatcher)) and \
(to_job is not None and to_job == self.current_job):
self.job_steps.put(job)
else:
self.jobs.put(job)
return job
def create_job(self, func, done, description, args=[], kwargs={}):
return self.create_job_step(func, done, description, None, args, kwargs)
def has_card(self):
try:
return bool(self.device.card_prefix())
@ -295,10 +309,10 @@ class DeviceManager(Thread): # {{{
self._device_information = {'info': info, 'prefixes': cp, 'freespace': fs}
return info, cp, fs
def get_device_information(self, done):
def get_device_information(self, done, add_as_step_to_job=None):
'''Get device information and free space on device'''
return self.create_job(self._get_device_information, done,
description=_('Get device information'))
return self.create_job_step(self._get_device_information, done,
description=_('Get device information'), to_job=add_as_step_to_job)
def get_current_device_information(self):
return self._device_information
@ -310,36 +324,38 @@ class DeviceManager(Thread): # {{{
cardblist = self.device.books(oncard='cardb')
return (mainlist, cardalist, cardblist)
def books(self, done):
def books(self, done, add_as_step_to_job=None):
'''Return callable that returns the list of books on device as two booklists'''
return self.create_job(self._books, done, description=_('Get list of books on device'))
return self.create_job_step(self._books, done,
description=_('Get list of books on device'), to_job=add_as_step_to_job)
def _annotations(self, path_map):
return self.device.get_annotations(path_map)
def annotations(self, done, path_map):
def annotations(self, done, path_map, add_as_step_to_job=None):
'''Return mapping of ids to annotations. Each annotation is of the
form (type, location_info, content). path_map is a mapping of
ids to paths on the device.'''
return self.create_job(self._annotations, done, args=[path_map],
description=_('Get annotations from device'))
return self.create_job_step(self._annotations, done, args=[path_map],
description=_('Get annotations from device'), to_job=add_as_step_to_job)
def _sync_booklists(self, booklists):
'''Sync metadata to device'''
self.device.sync_booklists(booklists, end_session=False)
return self.device.card_prefix(end_session=False), self.device.free_space()
def sync_booklists(self, done, booklists, plugboards):
def sync_booklists(self, done, booklists, plugboards, add_as_step_to_job=None):
if hasattr(self.connected_device, 'set_plugboards') and \
callable(self.connected_device.set_plugboards):
self.connected_device.set_plugboards(plugboards, find_plugboard)
return self.create_job(self._sync_booklists, done, args=[booklists],
description=_('Send metadata to device'))
return self.create_job_step(self._sync_booklists, done, args=[booklists],
description=_('Send metadata to device'), to_job=add_as_step_to_job)
def upload_collections(self, done, booklist, on_card):
return self.create_job(booklist.rebuild_collections, done,
def upload_collections(self, done, booklist, on_card, add_as_step_to_job=None):
return self.create_job_step(booklist.rebuild_collections, done,
args=[booklist, on_card],
description=_('Send collections to device'))
description=_('Send collections to device'),
to_job=add_as_step_to_job)
def _upload_books(self, files, names, on_card=None, metadata=None, plugboards=None):
'''Upload books to device: '''
@ -374,11 +390,12 @@ class DeviceManager(Thread): # {{{
metadata=metadata, end_session=False)
def upload_books(self, done, files, names, on_card=None, titles=None,
metadata=None, plugboards=None):
metadata=None, plugboards=None, add_as_step_to_job=None):
desc = _('Upload %d books to device')%len(names)
if titles:
desc += u':' + u', '.join(titles)
return self.create_job(self._upload_books, done, args=[files, names],
return self.create_job_step(self._upload_books, done, to_job=add_as_step_to_job,
args=[files, names],
kwargs={'on_card':on_card,'metadata':metadata,'plugboards':plugboards}, description=desc)
def add_books_to_metadata(self, locations, metadata, booklists):
@ -388,9 +405,10 @@ class DeviceManager(Thread): # {{{
'''Remove books from device'''
self.device.delete_books(paths, end_session=True)
def delete_books(self, done, paths):
return self.create_job(self._delete_books, done, args=[paths],
description=_('Delete books from device'))
def delete_books(self, done, paths, add_as_step_to_job=None):
return self.create_job_step(self._delete_books, done, args=[paths],
description=_('Delete books from device'),
to_job=add_as_step_to_job)
def remove_books_from_metadata(self, paths, booklists):
self.device.remove_books_from_metadata(paths, booklists)
@ -405,9 +423,10 @@ class DeviceManager(Thread): # {{{
self.device.get_file(path, f)
f.close()
def save_books(self, done, paths, target):
return self.create_job(self._save_books, done, args=[paths, target],
description=_('Download books from device'))
def save_books(self, done, paths, target, add_as_step_to_job=None):
return self.create_job_step(self._save_books, done, args=[paths, target],
description=_('Download books from device'),
to_job=add_as_step_to_job)
def _view_book(self, path, target):
f = open(target, 'wb')
@ -415,9 +434,9 @@ class DeviceManager(Thread): # {{{
f.close()
return target
def view_book(self, done, path, target):
return self.create_job(self._view_book, done, args=[path, target],
description=_('View book on device'))
def view_book(self, done, path, target, add_as_step_to_job=None):
return self.create_job_step(self._view_book, done, args=[path, target],
description=_('View book on device'), to_job=add_as_step_to_job)
def set_current_library_uuid(self, uuid):
self.current_library_uuid = uuid
@ -778,7 +797,8 @@ class DeviceMixin(object): # {{{
self.device_manager.device.icon)
self.bars_manager.update_bars()
self.status_bar.device_connected(info[0])
self.device_manager.books(FunctionDispatcher(self.metadata_downloaded))
self.device_manager.books(FunctionDispatcher(self.metadata_downloaded),
add_as_step_to_job=job)
def metadata_downloaded(self, job):
'''
@ -788,7 +808,7 @@ class DeviceMixin(object): # {{{
self.device_job_exception(job)
return
# set_books_in_library might schedule a sync_booklists job
self.set_books_in_library(job.result, reset=True)
self.set_books_in_library(job.result, reset=True, add_as_step_to_job=job)
mainlist, cardalist, cardblist = job.result
self.memory_view.set_database(mainlist)
self.memory_view.set_editable(self.device_manager.device.CAN_SET_METADATA,
@ -843,8 +863,8 @@ class DeviceMixin(object): # {{{
# set_books_in_library even though books were not added because
# the deleted book might have been an exact match. Upload the booklists
# if set_books_in_library did not.
if not self.set_books_in_library(self.booklists(), reset=True):
self.upload_booklists()
if not self.set_books_in_library(self.booklists(), reset=True, add_as_step_to_job=job):
self.upload_booklists(job)
self.book_on_device(None, reset=True)
# We need to reset the ondevice flags in the library. Use a big hammer,
# so we don't need to worry about whether some succeeded or not.
@ -1193,13 +1213,14 @@ class DeviceMixin(object): # {{{
self.device_manager.sync_booklists(Dispatcher(lambda x: x),
self.booklists(), plugboards)
def upload_booklists(self):
def upload_booklists(self, add_as_step_to_job=None):
'''
Upload metadata to device.
'''
plugboards = self.library_view.model().db.prefs.get('plugboards', {})
self.device_manager.sync_booklists(FunctionDispatcher(self.metadata_synced),
self.booklists(), plugboards)
self.booklists(), plugboards,
add_as_step_to_job=add_as_step_to_job)
def metadata_synced(self, job):
'''
@ -1274,8 +1295,8 @@ class DeviceMixin(object): # {{{
# because the UUID changed. Force both the device and the library view
# to refresh the flags. Set_books_in_library could upload the booklists.
# If it does not, then do it here.
if not self.set_books_in_library(self.booklists(), reset=True):
self.upload_booklists()
if not self.set_books_in_library(self.booklists(), reset=True, add_as_step_to_job=job):
self.upload_booklists(job)
with self.library_view.preserve_selected_books:
self.book_on_device(None, reset=True)
self.refresh_ondevice()
@ -1335,7 +1356,7 @@ class DeviceMixin(object): # {{{
loc[4] |= self.book_db_uuid_path_map[id]
return loc
def set_books_in_library(self, booklists, reset=False):
def set_books_in_library(self, booklists, reset=False, add_as_step_to_job=None):
'''
Set the ondevice indications in the device database.
This method should be called before book_on_device is called, because
@ -1487,7 +1508,7 @@ class DeviceMixin(object): # {{{
plugboards = self.library_view.model().db.prefs.get('plugboards', {})
self.device_manager.sync_booklists(
FunctionDispatcher(self.metadata_synced), booklists,
plugboards)
plugboards, add_as_step_to_job)
return update_metadata
# }}}

View File

@ -432,6 +432,10 @@ class JobsDialog(QDialog, Ui_JobsDialog):
self.jobs_view.horizontalHeader().restoreState(QByteArray(state))
except:
pass
idx = self.jobs_view.model().index(0, 0)
if idx.isValid():
sm = self.jobs_view.selectionModel()
sm.select(idx, sm.ClearAndSelect|sm.Rows)
def save_state(self):
try:

View File

@ -727,6 +727,15 @@ class TagTreeItem(object): # {{{
else:
self.tag.state = set_to
def all_children(self):
res = []
def recurse(nodes, res):
for t in nodes:
res.append(t)
recurse(t.children, res)
recurse(self.children, res)
return res
def child_tags(self):
res = []
def recurse(nodes, res):
@ -1269,6 +1278,7 @@ class TagsModel(QAbstractItemModel): # {{{
category_icon = category_node.icon,
category_key=category_node.category_key,
icon_map=self.icon_state_map)
sub_cat.tag.is_searchable = False
self.endInsertRows()
else: # by 'first letter'
cl = cl_list[idx]
@ -1644,14 +1654,23 @@ class TagsModel(QAbstractItemModel): # {{{
if node.tag.state:
if node.category_key == "news":
if node_searches[node.tag.state] == 'true':
ans.append('tags:=news')
ans.append('tags:"=' + _('News') + '"')
else:
ans.append('( not tags:=news )')
ans.append('( not tags:"=' + _('News') + '")')
else:
ans.append('%s:%s'%(node.category_key, node_searches[node.tag.state]))
key = node.category_key
for tag_item in node.child_tags():
for tag_item in node.all_children():
if tag_item.type == TagTreeItem.CATEGORY:
if self.collapse_model == 'first letter' and \
tag_item.temporary and not key.startswith('@') \
and tag_item.tag.state:
if node_searches[tag_item.tag.state] == 'true':
ans.append('%s:~^%s'%(key, tag_item.py_name))
else:
ans.append('(not %s:~^%s )'%(key, tag_item.py_name))
continue
tag = tag_item.tag
if tag.state != TAG_SEARCH_STATES['clear']:
if tag.state == TAG_SEARCH_STATES['mark_minus'] or \

View File

@ -179,7 +179,7 @@ class UpdateMixin(object):
def plugin_update_found(self, number_of_updates):
# Change the plugin icon to indicate there are updates available
plugin = self.iactions.get('Plugin Updates', None)
plugin = self.iactions.get('Plugin Updater', None)
if not plugin:
return
if number_of_updates:

View File

@ -145,7 +145,7 @@ def _match(query, value, matchkind):
return True
elif query == t:
return True
elif ((matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
elif ((matchkind == REGEXP_MATCH and re.search(query, t, re.I|re.UNICODE)) or ### search unanchored
(matchkind == CONTAINS_MATCH and query in t)):
return True
except re.error:

View File

@ -3,7 +3,7 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Greg Riker'
import codecs, datetime, htmlentitydefs, os, re, shutil, time, zlib
import codecs, datetime, htmlentitydefs, os, re, shutil, zlib
from collections import namedtuple
from copy import deepcopy
from xml.sax.saxutils import escape
@ -25,7 +25,7 @@ from calibre.utils.html2text import html2text
from calibre.utils.icu import capitalize
from calibre.utils.logging import default_log as log
from calibre.utils.magick.draw import thumbnail
from calibre.utils.zipfile import ZipFile, ZipInfo
from calibre.utils.zipfile import ZipFile
FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
'cover', 'formats','id', 'isbn', 'ondevice', 'pubdate', 'publisher',
@ -4704,24 +4704,33 @@ Author '{0}':
to be replaced.
'''
def open_archive(mode='r'):
try:
return ZipFile(self.__archive_path, mode=mode)
except:
# Happens on windows if the file is opened by another
# process
pass
# Generate crc for current cover
#self.opts.log.info(" generateThumbnail():")
data = open(title['cover'], 'rb').read()
with open(title['cover'], 'rb') as f:
data = f.read()
cover_crc = hex(zlib.crc32(data))
# Test cache for uuid
with ZipFile(self.__archive_path, mode='r') as zfr:
zf = open_archive()
if zf is not None:
with zf:
try:
t_info = zfr.getinfo(title['uuid'])
zf.getinfo(title['uuid']+cover_crc)
except:
pass
else:
if t_info.comment == cover_crc:
# uuid found in cache with matching crc
thumb_data = zfr.read(title['uuid'])
zfr.extract(title['uuid'],image_dir)
os.rename(os.path.join(image_dir,title['uuid']),
os.path.join(image_dir,thumb_file))
thumb_data = zf.read(title['uuid'])
with open(os.path.join(image_dir, thumb_file), 'wb') as f:
f.write(thumb_data)
return
@ -4732,10 +4741,13 @@ Author '{0}':
f.write(thumb_data)
# Save thumb to archive
t_info = ZipInfo(title['uuid'],time.localtime()[0:6])
t_info.comment = cover_crc
with ZipFile(self.__archive_path, mode='a') as zfw:
zfw.writestr(t_info, thumb_data)
if zf is not None: # Ensure that the read succeeded
# If we failed to open the zip file for reading,
# we dont know if it contained the thumb or not
zf = open_archive('a')
if zf is not None:
with zf:
zf.writestr(title['uuid']+cover_crc, thumb_data)
def getFriendlyGenreTag(self, genre):
# Find the first instance of friendly_tag matching genre

View File

@ -24,6 +24,7 @@ NON_EBOOK_EXTENSIONS = frozenset([
class RestoreDatabase(LibraryDatabase2):
PATH_LIMIT = 10
WINDOWS_LIBRARY_PATH_LIMIT = 180
def set_path(self, *args, **kwargs):
pass

View File

@ -657,6 +657,7 @@ Some limitations of PDF input are:
* Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well.
* Links and Tables of Contents are not supported
* PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters
* Some PDFs are made up of photographs of the page with OCRed text behind them. In such cases |app| uses the OCRed text, which can be very different from what you see when you view the PDF file
To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an
output ranging anywhere from decent to unusable, depending on the input PDF.

View File

@ -28,7 +28,7 @@ For example, adding support for a new device to |app| typically involves writing
a device driver plugin. You can browse the
`built-in drivers <http://bazaar.launchpad.net/%7Ekovid/calibre/trunk/files/head%3A/src/calibre/devices/>`_. Similarly, adding support
for new conversion formats involves writing input/output format plugins. Another example of the modular design is the :ref:`recipe system <news>` for
fetching news.
fetching news. For more examples of plugins designed to add features to |app|, see the `plugin index <http://www.mobileread.com/forums/showthread.php?t=118764>`_.
Code layout
^^^^^^^^^^^^^^
@ -36,10 +36,21 @@ Code layout
All the |app| python code is in the ``calibre`` package. This package contains the following main sub-packages
* devices - All the device drivers. Just look through some of the built-in drivers to get an idea for how they work.
* ebooks - All the ebook conversion code. A good starting point is ``calibre.ebooks.conversion.cli`` which is the
module powering the :command:`ebook-convert` command.
* library - The database backed and the content server.
* gui2 - The Graphical User Interface.
* For details, see: devices.interface which defines the interface supported by device drivers and devices.usbms which
defines a generic driver that connects to a USBMS device. All USBMS based drivers in calibre inherit from it.
* ebooks - All the ebook conversion/metadata code. A good starting point is ``calibre.ebooks.conversion.cli`` which is the
module powering the :command:`ebook-convert` command. The conversion process is controlled via conversion.plumber.
The format independent code is all in ebooks.oeb and the format dependent stuff is in ebooks.format_name.
* Metadata reading writing and downloading is all in ebooks.metadata
* library - The database backed and the content server. See library.database2 for the interface to the calibre library. library.server is the calibre Content Server.
* gui2 - The Graphical User Interface. GUI initialization happens in gui2.main and gui2.ui. The ebook-viewer is in gui2.viewer.
If you need help understanding the code, post in the `development forum <http://www.mobileread.com/forums/forumdisplay.php?f=240>`_
and you will most likely get help from one of |app|'s many developers.
Getting the code
------------------
@ -82,9 +93,9 @@ Now whenever you commit changes to your branch with the command::
bzr commit -m "Comment describing your change"
I can merge it directly from you branch into the main |app| source tree. You should also subscribe to the |app|
developers mailing list `calibre-devs <https://launchpad.net/~calibre-devs>`_. Before making major changes, you should
discuss them on the mailing list or the #calibre IRC channel on Freenode to ensure that the changes will be accepted once you're done.
I can merge it directly from you branch into the main |app| source tree. You should also keep an eye on the |app|
`development forum <http://www.mobileread.com/forums/forumdisplay.php?f=240>`. Before making major changes, you should
discuss them in the forum or contact Kovid directly (his email address is all over the source code).
Windows development environment
---------------------------------

View File

@ -131,7 +131,7 @@ Follow these steps to find the problem:
* Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time.
* If you are connecting an Apple iDevice (iPad, iPod Touch, iPhone), use the 'Connect to iTunes' method in the 'Getting started' instructions in `Calibre + Apple iDevices: Start here <http://www.mobileread.com/forums/showthread.php?t=118559>`_.
* Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website <http://calibre-ebook.com/download>`_.
* Ensure your operating system is seeing the device. That is, the device should be mounted as a disk that you can access using Windows explorer or whatever the file management program on your computer is.
* Ensure your operating system is seeing the device. That is, the device should be mounted as a disk, that you can access using Windows explorer or whatever the file management program on your computer is. On Windows your device **must have been assigned a drive letter**, like K:.
* In calibre, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled.
* If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker <http://bugs.calibre-ebook.com>`_.