Merge from trunk

This commit is contained in:
Charles Haley 2011-06-27 17:22:51 +01:00
commit 4c3473d82c
14 changed files with 309 additions and 3220 deletions

View File

@ -2,90 +2,92 @@
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Wprost(BasicNewsRecipe):
EDITION = 0
FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
EDITION = 0
FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
title = u'Wprost'
__author__ = 'matek09'
description = 'Weekly magazine'
encoding = 'ISO-8859-2'
no_stylesheets = True
language = 'pl'
remove_javascript = True
title = u'Wprost'
__author__ = 'matek09'
description = 'Weekly magazine'
encoding = 'ISO-8859-2'
no_stylesheets = True
language = 'pl'
remove_javascript = True
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
'''keep_only_tags =[]
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
'''keep_only_tags =[]
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: '')]
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: ''),
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<tr>'), lambda match: ''),
(re.compile(r'\<td .*?\>'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
#h2 {font-size: x-large; font-weight: bold}
def is_blocked(self, a):
if a.findNextSibling('img') is None:
return False
else:
return True
def is_blocked(self, a):
if a.findNextSibling('img') is None:
return False
else:
return True
def find_last_issue(self):
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
a = 0
if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.cover_url = a.img['src']
def find_last_issue(self):
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
a = 0
if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.cover_url = a.img['src']
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = self.tag_to_string(main_block)
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = self.tag_to_string(main_block)
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}

View File

@ -5,7 +5,7 @@ __copyright__ = '2010, Gregory Riker'
__docformat__ = 'restructuredtext en'
import cStringIO, ctypes, datetime, os, re, sys, tempfile, time
import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time
from calibre.constants import __appname__, __version__, DEBUG
from calibre import fit_image, confirm_config_name
from calibre.constants import isosx, iswindows
@ -119,11 +119,17 @@ class DriverBase(DeviceConfig, DevicePlugin):
'iBooks Category'),
_('Cache covers from iTunes/iBooks') +
':::' +
_('Enable to cache and display covers from iTunes/iBooks')
_('Enable to cache and display covers from iTunes/iBooks'),
_("'Copy files to iTunes Media folder" u"\u2026" "' is enabled in iTunes Preferences|Advanced") +
':::' +
_("<p>This setting should match your iTunes <i>Preferences</i>|<i>Advanced</i> setting.</p>"
"<p>Disabling will store copies of books transferred to iTunes in your calibre configuration directory.</p>"
"<p>Enabling indicates that iTunes is configured to store copies in your iTunes Media folder.</p>")
]
EXTRA_CUSTOMIZATION_DEFAULT = [
True,
True,
False,
]
@ -193,6 +199,7 @@ class ITUNES(DriverBase):
# EXTRA_CUSTOMIZATION_MESSAGE indexes
USE_SERIES_AS_CATEGORY = 0
CACHE_COVERS = 1
USE_ITUNES_STORAGE = 2
OPEN_FEEDBACK_MESSAGE = _(
'Apple device detected, launching iTunes, please wait ...')
@ -281,6 +288,7 @@ class ITUNES(DriverBase):
description_prefix = "added by calibre"
ejected = False
iTunes= None
iTunes_local_storage = None
library_orphans = None
log = Log()
manual_sync_mode = False
@ -825,7 +833,7 @@ class ITUNES(DriverBase):
# Confirm/create thumbs archive
if not os.path.exists(self.cache_dir):
if DEBUG:
self.log.info(" creating thumb cache '%s'" % self.cache_dir)
self.log.info(" creating thumb cache at '%s'" % self.cache_dir)
os.makedirs(self.cache_dir)
if not os.path.exists(self.archive_path):
@ -837,6 +845,17 @@ class ITUNES(DriverBase):
if DEBUG:
self.log.info(" existing thumb cache at '%s'" % self.archive_path)
# If enabled in config options, create/confirm an iTunes storage folder
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
if not os.path.exists(self.iTunes_local_storage):
if DEBUG:
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
os.mkdir(self.iTunes_local_storage)
else:
if DEBUG:
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
def remove_books_from_metadata(self, paths, booklists):
'''
Remove books from the metadata list. This function must not communicate
@ -1281,50 +1300,27 @@ class ITUNES(DriverBase):
if DEBUG:
self.log.info(" ITUNES._add_new_copy()")
def _save_last_known_iTunes_storage(lb_added):
if isosx:
fp = lb_added.location().path
index = fp.rfind('/Books') + len('/Books')
last_known_iTunes_storage = fp[:index]
elif iswindows:
fp = lb_added.Location
index = fp.rfind('\Books') + len('\Books')
last_known_iTunes_storage = fp[:index]
dynamic['last_known_iTunes_storage'] = last_known_iTunes_storage
self.log.warning(" last_known_iTunes_storage: %s" % last_known_iTunes_storage)
db_added = None
lb_added = None
# If using iTunes_local_storage, copy the file, redirect iTunes to use local copy
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
local_copy = os.path.join(self.iTunes_local_storage, str(metadata.uuid) + os.path.splitext(fpath)[1])
shutil.copyfile(fpath,local_copy)
fpath = local_copy
if self.manual_sync_mode:
'''
This is the unsupported direct-connect mode.
In an attempt to avoid resetting the iTunes library Media folder, don't try to
add the book to iTunes if the last_known_iTunes_storage path is inaccessible.
This means that the path has to be set at least once, probably by using
'Connect to iTunes' and doing a transfer.
Unsupported direct-connect mode.
'''
self.log.warning(" unsupported direct connect mode")
db_added = self._add_device_book(fpath, metadata)
last_known_iTunes_storage = dynamic.get('last_known_iTunes_storage', None)
if last_known_iTunes_storage is not None:
if os.path.exists(last_known_iTunes_storage):
if DEBUG:
self.log.warning(" iTunes storage online, adding to library")
lb_added = self._add_library_book(fpath, metadata)
else:
if DEBUG:
self.log.warning(" iTunes storage not online, can't add to library")
if lb_added:
_save_last_known_iTunes_storage(lb_added)
lb_added = self._add_library_book(fpath, metadata)
if not lb_added and DEBUG:
self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title)
else:
lb_added = self._add_library_book(fpath, metadata)
if lb_added:
_save_last_known_iTunes_storage(lb_added)
else:
if not lb_added:
raise UserFeedback("iTunes Media folder inaccessible",
details="Failed to add '%s' to iTunes" % metadata.title,
level=UserFeedback.WARN)
@ -1520,7 +1516,7 @@ class ITUNES(DriverBase):
else:
self.log.error(" book_playlist not found")
if len(dev_books):
if dev_books is not None and len(dev_books):
first_book = dev_books[0]
if False:
self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist()))
@ -1551,7 +1547,7 @@ class ITUNES(DriverBase):
dev_books = pl.Tracks
break
if dev_books.Count:
if dev_books is not None and dev_books.Count:
first_book = dev_books.Item(1)
#if DEBUG:
#self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist))
@ -2526,7 +2522,15 @@ class ITUNES(DriverBase):
self.log.info(" processing %s" % fp)
if fp.startswith(prefs['library_path']):
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
fp.startswith(self.iTunes_local_storage) and \
os.path.exists(fp):
# Delete the copy in iTunes_local_storage
os.remove(fp)
if DEBUG:
self.log(" removing from iTunes_local_storage")
else:
# Delete from iTunes Media folder
if os.path.exists(fp):
os.remove(fp)
if DEBUG:
@ -2544,12 +2548,6 @@ class ITUNES(DriverBase):
os.rmdir(author_storage_path)
if DEBUG:
self.log.info(" removing empty author directory")
'''
else:
if DEBUG:
self.log.info(" author_storage_path not empty:")
self.log.info(" %s" % '\n'.join(author_files))
'''
else:
self.log.info(" '%s' does not exist at storage location" % cached_book['title'])
@ -2586,7 +2584,15 @@ class ITUNES(DriverBase):
self.log.info(" processing %s" % fp)
if fp.startswith(prefs['library_path']):
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
fp.startswith(self.iTunes_local_storage) and \
os.path.exists(fp):
# Delete the copy in iTunes_local_storage
os.remove(fp)
if DEBUG:
self.log(" removing from iTunes_local_storage")
else:
# Delete from iTunes Media folder
if os.path.exists(fp):
os.remove(fp)
if DEBUG:
@ -3234,6 +3240,17 @@ class ITUNES_ASYNC(ITUNES):
if DEBUG:
self.log.info(" existing thumb cache at '%s'" % self.archive_path)
# If enabled in config options, create/confirm an iTunes storage folder
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
if not os.path.exists(self.iTunes_local_storage):
if DEBUG:
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
os.mkdir(self.iTunes_local_storage)
else:
if DEBUG:
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
def sync_booklists(self, booklists, end_session=True):
'''
Update metadata on device.

View File

@ -8,14 +8,19 @@ __docformat__ = 'restructuredtext en'
import mimetypes
import urllib
from contextlib import closing
from lxml import etree
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
from calibre.utils.opensearch import Client
from calibre.utils.opensearch.description import Description
from calibre.utils.opensearch.query import Query
class OpenSearchStore(StorePlugin):
@ -38,38 +43,59 @@ class OpenSearchStore(StorePlugin):
if not hasattr(self, 'open_search_url'):
return
client = Client(self.open_search_url)
results = client.search(urllib.quote_plus(query), max_results)
description = Description(self.open_search_url)
url_template = description.get_best_template()
if not url_template:
return
oquery = Query(url_template)
# set up initial values
oquery.searchTerms = urllib.quote_plus(query)
oquery.count = max_results
url = oquery.url()
counter = max_results
for r in results:
if counter <= 0:
break
counter -= 1
br = browser()
with closing(br.open(url, timeout=timeout)) as f:
doc = etree.fromstring(f.read())
for data in doc.xpath('//*[local-name() = "entry"]'):
if counter <= 0:
break
s = SearchResult()
s.detail_item = r.get('id', '')
links = r.get('links', None)
for l in links:
if l.get('rel', None):
if l['rel'] in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
s.cover_url = l.get('href', '')
elif l['rel'] == u'http://opds-spec.org/acquisition/buy':
s.detail_item = l.get('href', s.detail_item)
elif l['rel'] == u'http://opds-spec.org/acquisition':
mime = l.get('type', '')
if mime:
ext = mimetypes.guess_extension(mime)
if ext:
ext = ext[1:].upper()
s.downloads[ext] = l.get('href', '')
counter -= 1
s = SearchResult()
s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()
s.formats = ', '.join(s.downloads.keys())
for link in data.xpath('./*[local-name() = "link"]'):
rel = link.get('rel')
href = link.get('href')
type = link.get('type')
if rel and href and type:
if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
s.cover_url = href
elif rel == u'http://opds-spec.org/acquisition/buy':
s.detail_item = href
elif rel == u'http://opds-spec.org/acquisition':
if type:
ext = mimetypes.guess_extension(type)
if ext:
ext = ext[1:].upper().strip()
s.downloads[ext] = href
s.formats = ', '.join(s.downloads.keys()).strip()
s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()
price_e = data.xpath('.//*[local-name() = "price"][1]')
if price_e:
price_e = price_e[0]
currency_code = price_e.get('currencycode', '')
price = ''.join(price_e.xpath('.//text()')).strip()
s.price = currency_code + ' ' + price
s.price = s.price.strip()
s.title = r.get('title', '')
s.author = r.get('author', '')
s.price = r.get('price', '')
yield s
yield s

View File

@ -24,8 +24,12 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
s.price = '$0.00'
s.drm = SearchResult.DRM_UNLOCKED
yield s
'''
def get_details(self, search_result, timeout):
'''
The opensearch feed only returns a subset of formats that are available.
We want to get a list of all formats that the user can get.
'''
br = browser()
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
@ -33,4 +37,3 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
search_result.formats = formats.upper()
return True
'''

View File

@ -224,6 +224,7 @@ class TagsModel(QAbstractItemModel): # {{{
self.row_map = []
self.root_item = self.create_node(icon_map=self.icon_state_map)
self.db = None
self._build_in_progress = False
self.reread_collapse_model({}, rebuild=False)
def reread_collapse_model(self, state_map, rebuild=True):
@ -257,9 +258,17 @@ class TagsModel(QAbstractItemModel): # {{{
self.endResetModel()
def rebuild_node_tree(self, state_map={}):
if self._build_in_progress:
print ('Tag Browser build already in progress')
traceback.print_stack()
return
#traceback.print_stack()
#print ()
self._build_in_progress = True
self.beginResetModel()
self._run_rebuild(state_map=state_map)
self.endResetModel()
self._build_in_progress = False
def _run_rebuild(self, state_map={}):
for node in self.node_map.itervalues():

View File

@ -91,10 +91,10 @@ class TagBrowserMixin(object): # {{{
# Add the new category
user_cats[new_cat] = []
db.prefs.set('user_categories', user_cats)
self.tags_view.set_new_model()
self.tags_view.recount()
m = self.tags_view.model()
idx = m.index_for_path(m.find_category_node('@' + new_cat))
m.show_item_at_index(idx)
self.tags_view.show_item_at_index(idx)
# Open the editor on the new item to rename it
if new_category_name is None:
self.tags_view.edit(idx)
@ -111,7 +111,7 @@ class TagBrowserMixin(object): # {{{
for k in d.categories:
db.field_metadata.add_user_category('@' + k, k)
db.data.change_search_locations(db.field_metadata.get_search_terms())
self.tags_view.set_new_model()
self.tags_view.recount()
def do_delete_user_category(self, category_name):
'''
@ -144,7 +144,7 @@ class TagBrowserMixin(object): # {{{
elif k.startswith(category_name + '.'):
del user_cats[k]
db.prefs.set('user_categories', user_cats)
self.tags_view.set_new_model()
self.tags_view.recount()
def do_del_item_from_user_cat(self, user_cat, item_name, item_category):
'''
@ -413,13 +413,14 @@ class TagBrowserWidget(QWidget): # {{{
txt = unicode(self.item_search.currentText()).strip()
if txt.startswith('*'):
self.tags_view.set_new_model(filter_categories_by=txt[1:])
model.filter_categories_by = txt[1:]
self.tags_view.recount()
self.current_find_position = None
return
if model.get_filter_categories_by():
self.tags_view.set_new_model(filter_categories_by=None)
if model.filter_categories_by:
model.filter_categories_by = None
self.tags_view.recount()
self.current_find_position = None
model = self.tags_view.model()
if not txt:
return
@ -437,8 +438,9 @@ class TagBrowserWidget(QWidget): # {{{
self.current_find_position = \
model.find_item_node(key, txt, self.current_find_position)
if self.current_find_position:
model.show_item_at_path(self.current_find_position, box=True)
self.tags_view.show_item_at_path(self.current_find_position, box=True)
elif self.item_search.text():
self.not_found_label.setVisible(True)
if self.tags_view.verticalScrollBar().isVisible():

View File

@ -71,7 +71,6 @@ class TagsView(QTreeView): # {{{
search_item_renamed = pyqtSignal()
drag_drop_finished = pyqtSignal(object)
restriction_error = pyqtSignal()
show_at_path = pyqtSignal()
def __init__(self, parent=None):
QTreeView.__init__(self, parent=None)
@ -96,8 +95,6 @@ class TagsView(QTreeView): # {{{
self.user_category_icon = QIcon(I('tb_folder.png'))
self.delete_icon = QIcon(I('list_remove.png'))
self.rename_icon = QIcon(I('edit-undo.png'))
self.show_at_path.connect(self.show_item_at_path,
type=Qt.QueuedConnection)
self._model = TagsModel(self)
self._model.search_item_renamed.connect(self.search_item_renamed)
@ -176,7 +173,8 @@ class TagsView(QTreeView): # {{{
state_map = self.get_state()[1]
self.db.prefs.set('user_categories', user_cats)
self._model.rebuild_node_tree(state_map=state_map)
self.show_at_path.emit('@'+nkey)
p = self._model.find_category_node('@'+nkey)
self.show_item_at_path(p)
@property
def match_all(self):
@ -501,6 +499,8 @@ class TagsView(QTreeView): # {{{
return
src_is_tb = event.mimeData().hasFormat('application/calibre+from_tag_browser')
item = index.data(Qt.UserRole).toPyObject()
if item.type == TagTreeItem.ROOT:
return
flags = self._model.flags(index)
if item.type == TagTreeItem.TAG and flags & Qt.ItemIsDropEnabled:
self.setDropIndicatorShown(not src_is_tb)
@ -570,7 +570,7 @@ class TagsView(QTreeView): # {{{
def show_item_at_index(self, idx, box=False,
position=QTreeView.PositionAtCenter):
if idx.isValid():
if idx.isValid() and idx.data(Qt.UserRole).toPyObject() is not self._model.root_item:
self.setCurrentIndex(idx)
self.scrollTo(idx, position)
self.setCurrentIndex(idx)

View File

@ -1,6 +0,0 @@
from description import Description
from query import Query
from client import Client
from results import Results
Description, Query, Client, Results

View File

@ -1,39 +0,0 @@
from description import Description
from query import Query
from results import Results
class Client:
"""This is the class you'll probably want to be using. You simply
pass the constructor the url for the service description file and
issue a search and get back results as an iterable Results object.
The neat thing about a Results object is that it will seamlessly
handle fetching more results from the opensearch server when it can...
so you just need to iterate and can let the paging be taken care of
for you.
from opensearch import Client
client = Client(description_url)
results = client.search("computer")
for result in results:
print result.title
"""
def __init__(self, url, agent="python-opensearch <https://github.com/edsu/opensearch>"):
self.agent = agent
self.description = Description(url, self.agent)
def search(self, search_terms, page_size=25):
"""Perform a search and get back a results object
"""
url = self.description.get_best_template()
query = Query(url)
# set up initial values
query.searchTerms = search_terms
query.count = page_size
# run the results
return Results(query, agent=self.agent)

View File

@ -1,71 +1,95 @@
from urllib2 import urlopen, Request
from xml.dom.minidom import parse
from url import URL
# -*- coding: utf-8 -*-
class Description:
"""A class for representing OpenSearch Description files.
"""
from __future__ import (unicode_literals, division, absolute_import, print_function)
def __init__(self, url="", agent=""):
"""The constructor which may pass an optional url to load from.
__license__ = 'GPL 3'
__copyright__ = '''
2011, John Schember <john@nachtimwald.com>,
2006, Ed Summers <ehs@pobox.com>
'''
__docformat__ = 'restructuredtext en'
from contextlib import closing
from lxml import etree
from calibre import browser
from calibre.utils.opensearch.url import URL
class Description(object):
'''
A class for representing OpenSearch Description files.
'''
def __init__(self, url=""):
'''
The constructor which may pass an optional url to load from.
d = Description("http://www.example.com/description")
"""
self.agent = agent
'''
if url:
self.load(url)
def load(self, url):
"""For loading up a description object from a url. Normally
'''
For loading up a description object from a url. Normally
you'll probably just want to pass a URL into the constructor.
"""
req = Request(url, headers={'User-Agent':self.agent})
self.dom = parse(urlopen(req))
'''
br = browser()
with closing(br.open(url, timeout=15)) as f:
doc = etree.fromstring(f.read())
# version 1.1 has repeating Url elements
self.urls = self._get_urls()
self.urls = []
for element in doc.xpath('//*[local-name() = "Url"]'):
template = element.get('template')
type = element.get('type')
if template and type:
url = URL()
url.template = template
url.type = type
self.urls.append(url)
# this is version 1.0 specific
self.url = self._get_element_text('Url')
self.format = self._get_element_text('Format')
self.url = ''.join(doc.xpath('//*[local-name() = "Url"][1]//text()'))
self.format = ''.join(doc.xpath('//*[local-name() = "Format"][1]//text()'))
self.shortname = self._get_element_text('ShortName')
self.longname = self._get_element_text('LongName')
self.description = self._get_element_text('Description')
self.image = self._get_element_text('Image')
self.samplesearch = self._get_element_text('SampleSearch')
self.developer = self._get_element_text('Developer')
self.contact = self._get_element_text('Contact')
self.attribution = self._get_element_text('Attribution')
self.syndicationright = self._get_element_text('SyndicationRight')
self.shortname = ''.join(doc.xpath('//*[local-name() = "ShortName"][1]//text()'))
self.longname = ''.join(doc.xpath('//*[local-name() = "LongName"][1]//text()'))
self.description = ''.join(doc.xpath('//*[local-name() = "Description"][1]//text()'))
self.image = ''.join(doc.xpath('//*[local-name() = "Image"][1]//text()'))
self.sameplesearch = ''.join(doc.xpath('//*[local-name() = "SampleSearch"][1]//text()'))
self.developer = ''.join(doc.xpath('//*[local-name() = "Developer"][1]//text()'))
self.contact = ''.join(doc.xpath('/*[local-name() = "Contact"][1]//text()'))
self.attribution = ''.join(doc.xpath('//*[local-name() = "Attribution"][1]//text()'))
self.syndicationright = ''.join(doc.xpath('//*[local-name() = "SyndicationRight"][1]//text()'))
tag_text = self._get_element_text('Tags')
tag_text = ' '.join(doc.xpath('//*[local-name() = "Tags"]//text()'))
if tag_text != None:
self.tags = tag_text.split(" ")
self.tags = tag_text.split(' ')
if self._get_element_text('AdultContent') == 'true':
self.adultcontent = True
else:
self.adultcontent = False
self.adultcontent = doc.xpath('boolean(//*[local-name() = "AdultContent" and contains(., "true")])')
def get_url_by_type(self, type):
"""Walks available urls and returns them by type. Only
'''
Walks available urls and returns them by type. Only
appropriate in opensearch v1.1 where there can be multiple
query targets. Returns none if no such type is found.
url = description.get_url_by_type('application/rss+xml')
"""
'''
for url in self.urls:
if url.type == type:
return url
return None
def get_best_template(self):
"""OK, best is a value judgement, but so be it. You'll get
'''
OK, best is a value judgement, but so be it. You'll get
back either the atom, rss or first template available. This
method handles the main difference between opensearch v1.0 and v1.1
"""
'''
# version 1.0
if self.url:
return self.url
@ -88,40 +112,3 @@ class Description:
# out of luck
return None
# these are internal methods for querying xml
def _get_element_text(self, tag):
elements = self._get_elements(tag)
if not elements:
return None
return self._get_text(elements[0].childNodes)
def _get_attribute_text(self, tag, attribute):
elements = self._get_elements(tag)
if not elements:
return ''
return elements[0].getAttribute('template')
def _get_elements(self, tag):
return self.dom.getElementsByTagName(tag)
def _get_text(self, nodes):
text = ''
for node in nodes:
if node.nodeType == node.TEXT_NODE:
text += node.data
return text.strip()
def _get_urls(self):
urls = []
for element in self._get_elements('Url'):
template = element.getAttribute('template')
type = element.getAttribute('type')
if template and type:
url = URL()
url.template = template
url.type = type
urls.append(url)
return urls

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,17 @@
from urlparse import urlparse, urlunparse
from urllib import urlencode
from cgi import parse_qs
# -*- coding: utf-8 -*-
class Query:
"""Represents an opensearch query. Used internally by the Client to
construct an opensearch url to request. Really this class is just a
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2006, Ed Summers <ehs@pobox.com>'
__docformat__ = 'restructuredtext en'
from urlparse import urlparse, urlunparse, parse_qs
from urllib import urlencode
class Query(object):
'''
Represents an opensearch query Really this class is just a
helper for substituting values into the macros in a format.
format = 'http://beta.indeed.com/opensearch?q={searchTerms}&start={startIndex}&limit={count}'
@ -12,16 +19,17 @@ class Query:
q.searchTerms('zx81')
q.startIndex = 1
q.count = 25
print q.to_url()
"""
print q.url()
'''
standard_macros = ['searchTerms','count','startIndex','startPage',
standard_macros = ['searchTerms', 'count', 'startIndex', 'startPage',
'language', 'outputEncoding', 'inputEncoding']
def __init__(self, format):
"""Create a query object by passing it the url format obtained
'''
Create a query object by passing it the url format obtained
from the opensearch Description.
"""
'''
self.format = format
# unpack the url to a tuple
@ -37,7 +45,7 @@ class Query:
for key,values in self.query_string.items():
# TODO eventually optional/required params should be
# distinguished somehow (the ones with/without trailing ?
macro = values[0].replace('{','').replace('}','').replace('?','')
macro = values[0].replace('{', '').replace('}', '').replace('?', '')
if macro in Query.standard_macros:
self.macro_map[macro] = key

View File

@ -1,131 +0,0 @@
class Results(object):
def __init__(self, query, agent=None):
self.agent = agent
self._fetch(query)
self._iter = 0
def __iter__(self):
self._iter = 0
return self
def __len__(self):
return self.totalResults
def next(self):
# just keep going like the energizer bunny
while True:
# return any item we haven't returned
if self._iter < len(self.items):
self._iter += 1
return self.items[self._iter-1]
# if there appears to be more to fetch
if \
self.totalResults != 0 \
and self.totalResults > self.startIndex + self.itemsPerPage - 1:
# get the next query
next_query = self._get_next_query()
# if we got one executed it and go back to the beginning
if next_query:
self._fetch(next_query)
# very important to reset this counter
# or else the return will fail
self._iter = 0
else:
raise StopIteration
def _fetch(self, query):
import osfeedparser
feed = osfeedparser.opensearch_parse(query.url(), agent=self.agent)
self.feed = feed
# general channel stuff
channel = feed['feed']
self.title = _pick(channel,'title')
self.link = _pick(channel,'link')
self.description = _pick(channel,'description')
self.language = _pick(channel,'language')
self.copyright = _pick(channel,'copyright')
# get back opensearch specific values
self.totalResults = _pick(channel,'opensearch_totalresults',0)
self.startIndex = _pick(channel,'opensearch_startindex',1)
self.itemsPerPage = _pick(channel,'opensearch_itemsperpage',0)
# alias items from the feed to our results object
self.items = feed['items']
# set default values if necessary
if self.startIndex == 0:
self.startIndex = 1
if self.itemsPerPage == 0 and len(self.items) > 0:
self.itemsPerPage = len(self.items)
# store away query for calculating next results
# if necessary
self.last_query = query
def _get_next_query(self):
# update our query to get the next set of records
query = self.last_query
# use start page if the query supports it
if query.has_macro('startPage'):
# if the query already defined the startPage
# we just need to increment it
if hasattr(query, 'startPage'):
query.startPage += 1
# to issue the first query startPage might not have
# been specified, so set it to 2
else:
query.startPage = 2
return query
# otherwise the query should support startIndex
elif query.has_macro('startIndex'):
# if startIndex was used before we just add the
# items per page to it to get the next set
if hasattr(query, 'startIndex'):
query.startIndex += self.itemsPerPage
# to issue the first query the startIndex may have
# been left blank in that case we assume it to be
# the item just after the last one on this page
else:
query.startIndex = self.itemsPerPage + 1
return query
# doesn't look like there is another stage to this query
return None
# helper for pulling values out of a dictionary if they're there
# and returning a default value if they're not
def _pick(d,key,default=None):
# get the value out
value = d.get(key)
# if it wasn't there return the default
if value == None:
return default
# if they want an int try to convert to an int
# and return default if it fails
if type(default) == int:
try:
return int(d[key])
except:
return default
# otherwise we're good to return the value
return value

View File

@ -1,5 +1,15 @@
class URL:
"""Class for representing a URL in an opensearch v1.1 query"""
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2006, Ed Summers <ehs@pobox.com>'
__docformat__ = 'restructuredtext en'
class URL(object):
'''
Class for representing a URL in an opensearch v1.1 query
'''
def __init__(self, type='', template='', method='GET'):
self.type = type