[merge] from trunk

This commit is contained in:
Kolenka 2011-10-11 10:11:22 -07:00
commit 53bec8004f
18 changed files with 275 additions and 61 deletions

View File

@ -0,0 +1,64 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.defensenews.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class DefenseNews(BasicNewsRecipe):
title = 'Defense News'
__author__ = 'Darko Miletic'
description = 'Find late-breaking defense news from the leading defense news weekly'
publisher = 'Gannett Government Media Corporation'
category = 'defense news, defence news, defense, defence, defence budget, defence policy'
oldest_article = 31
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.info{font-size: small; color: gray}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [
dict(name=['meta','link'])
,dict(attrs={'class':['toolbar','related','left','right']})
]
remove_tags_before = attrs={'class':'storyWrp'}
remove_tags_after = attrs={'class':'middle'}
remove_attributes=['lang']
feeds = [
(u'Europe' , u'http://www.defensenews.com/rss/eur/' )
,(u'Americas', u'http://www.defensenews.com/rss/ame/' )
,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/' )
,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
,(u'Air', u'http://www.defensenews.com/rss/air/' )
,(u'Land', u'http://www.defensenews.com/rss/lan/' )
,(u'Naval', u'http://www.defensenews.com/rss/sea/' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -119,10 +119,8 @@ class Guardian(BasicNewsRecipe):
}
def parse_index(self):
try:
feeds = []
for title, href in self.find_sections():
feeds.append((title, list(self.find_articles(href))))
return feeds
except:
raise NotImplementedError

View File

@ -0,0 +1,27 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MercoPress(BasicNewsRecipe):
title = u'Merco Press'
description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
__author__ = 'Russell Phillips'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
remove_tags = [dict(name='a')]
feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
('Argentina', 'http://en.mercopress.com/rss/argentina'),
('Brazil', 'http://en.mercopress.com/rss/brazil'),
('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
('International News', 'http://en.mercopress.com/rss/international'),
('Latin America', 'http://en.mercopress.com/rss/latin-america'),
('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
('United States', 'http://en.mercopress.com/rss/united-states'),
('Uruguay://en.mercopress.com/rss/uruguay')]

View File

@ -0,0 +1,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MercoPress(BasicNewsRecipe):
title = u'Penguin News'
description = u"Penguin News: the Falkland Islands' only newspaper."
cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
language = 'en'
__author__ = 'Russell Phillips'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&amp;type=rss')]

17
recipes/wow.recipe Normal file
View File

@ -0,0 +1,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class WoW(BasicNewsRecipe):
title = u'WoW Insider'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('WoW',
'http://wow.joystiq.com/rss.xml')
]

View File

@ -224,6 +224,9 @@ try:
except:
try:
HOST=get_ip_address('wlan0')
except:
try:
HOST=get_ip_address('ppp0')
except:
HOST='192.168.1.2'

View File

@ -20,17 +20,23 @@ for x in [
EXCLUDES.extend(['--exclude', x])
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
def get_rsync_pw():
return open('/home/kovid/work/kde/conf/buildbot').read().partition(
':')[-1].strip()
class Rsync(Command):
description = 'Sync source tree from development machine'
SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
['rsync://{host}/work/{project}', '..'])
['rsync://buildbot@{host}/work/{project}', '..'])
def run(self, opts):
cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
env = dict(os.environ)
env['RSYNC_PASSWORD'] = get_rsync_pw()
self.info(cmd)
subprocess.check_call(cmd, shell=True)
subprocess.check_call(cmd, shell=True, env=env)
class Push(Command):
@ -81,7 +87,8 @@ class VMInstaller(Command):
def get_build_script(self):
ans = '\n'.join(self.BUILD_PREFIX)+'\n\n'
rs = ['export RSYNC_PASSWORD=%s'%get_rsync_pw()]
ans = '\n'.join(self.BUILD_PREFIX + rs)+'\n\n'
ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'

View File

@ -1144,6 +1144,16 @@ class StoreAmazonDEKindleStore(StoreBase):
formats = ['KINDLE']
affiliate = True
class StoreAmazonFRKindleStore(StoreBase):
name = 'Amazon FR Kindle'
author = 'Charles Haley'
description = u'Tous les ebooks Kindle'
actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore'
headquarters = 'DE'
formats = ['KINDLE']
affiliate = True
class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle'
author = 'Charles Haley'
@ -1521,6 +1531,7 @@ plugins += [
StoreArchiveOrgStore,
StoreAmazonKindleStore,
StoreAmazonDEKindleStore,
StoreAmazonFRKindleStore,
StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore,
StoreBNStore,

View File

@ -518,3 +518,9 @@ class BookList(list):
'''
raise NotImplementedError()
def prepare_addable_books(self, paths):
'''
Given a list of paths, returns another list of paths. These paths
point to addable versions of the books.
'''
return paths

View File

@ -47,6 +47,9 @@ class PRST1(USBMS):
WINDOWS_MAIN_MEM = re.compile(
r'(PRS-T1&)'
)
WINDOWS_CARD_A_MEM = re.compile(
r'(PRS-T1__SD&)'
)
MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
@ -253,8 +256,11 @@ class PRST1(USBMS):
# Get Metadata We Want
lpath = book.lpath
try:
author = newmi.authors[0]
title = newmi.title
except:
author = _('Unknown')
title = newmi.title or _('Unknown')
if lpath not in db_books:
query = '''

View File

@ -397,6 +397,7 @@ class AddAction(InterfaceAction):
d = error_dialog(self.gui, _('Add to library'), _('No book files found'))
d.exec_()
return
paths = self.gui.device_manager.device.prepare_addable_books(paths)
from calibre.gui2.add import Adder
self.__adder_func = partial(self._add_from_device_adder, on_card=None,
model=view.model())

View File

@ -206,7 +206,7 @@
<item>
<widget class="QCheckBox" name="opt_autolaunch_server">
<property name="text">
<string>Run server &amp;automatically on startup</string>
<string>Run server &amp;automatically when calibre starts</string>
</property>
</widget>
</item>

View File

@ -37,6 +37,7 @@ class SearchRestrictionMixin(object):
search = unicode(search)
if not search:
self.search_restriction.setCurrentIndex(0)
self._apply_search_restriction('')
else:
s = '*' + search
if self.search_restriction.count() > 1:

View File

@ -6,7 +6,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
@ -37,27 +36,16 @@ class AmazonDEKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + urllib.quote_plus(query)
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Amazon has two results pages.
# 20110725: seems that is_shot is gone.
# is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
# # Horizontal grid of books.
# if is_shot:
# data_xpath = '//div[contains(@class, "result")]'
# format_xpath = './/div[@class="productTitle"]/text()'
# cover_xpath = './/div[@class="productTitle"]//img/@src'
# # Vertical list of books.
# else:
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
# end is_shot else
for data in doc.xpath(data_xpath):
if counter <= 0:
@ -80,11 +68,9 @@ class AmazonDEKindleStore(StorePlugin):
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
# if is_shot:
# author = format.split(' von ')[-1]
# else:
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
author = author.split('von ')[-1]
if author.startswith('von '):
author = author[4:]
counter -= 1

View File

@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonFRKindleStore(StorePlugin):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
import random
import re
import urllib
from contextlib import closing
from lxml import html
@ -122,12 +121,12 @@ class AmazonKindleStore(StorePlugin):
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
url = self.search_url + urllib.quote_plus(query)
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Amazon has two results pages.
is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')

View File

@ -6,7 +6,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
@ -34,27 +33,16 @@ class AmazonUKKindleStore(StorePlugin):
def search(self, query, max_results=10, timeout=60):
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + urllib.quote_plus(query)
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Amazon has two results pages.
# 20110725: seems that is_shot is gone.
# is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
# # Horizontal grid of books.
# if is_shot:
# data_xpath = '//div[contains(@class, "result")]'
# format_xpath = './/div[@class="productTitle"]/text()'
# cover_xpath = './/div[@class="productTitle"]//img/@src'
# # Vertical list of books.
# else:
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
# end is_shot else
for data in doc.xpath(data_xpath):
if counter <= 0:
@ -77,11 +65,9 @@ class AmazonUKKindleStore(StorePlugin):
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
# if is_shot:
# author = format.split(' von ')[-1]
# else:
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
author = author.split('by ')[-1]
if author.startswith('by '):
author = author[3:]
counter -= 1

View File

@ -47,6 +47,9 @@ def get_parser(usage):
def get_db(dbpath, options):
if options.library_path is not None:
dbpath = options.library_path
if dbpath is None:
raise ValueError('No saved library path, either run the GUI or use the'
' --with-library option')
dbpath = os.path.abspath(dbpath)
return LibraryDatabase2(dbpath)