mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Pull from trunk
This commit is contained in:
commit
fe918ab068
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.5.1'
|
||||
__version__ = '0.5.2'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -108,7 +108,7 @@ def find_html_index(files):
|
||||
html_files = [f[0] for f in html_files]
|
||||
for q in ('toc', 'index'):
|
||||
for f in html_files:
|
||||
if os.path.splitext(f)[0].lower() == q:
|
||||
if os.path.splitext(os.path.basename(f))[0].lower() == q:
|
||||
return f, os.path.splitext(f)[1].lower()[1:]
|
||||
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
||||
|
||||
@ -199,6 +199,10 @@ class HTMLProcessor(Processor, Rationalizer):
|
||||
|
||||
for tag in self.root.xpath('//form'):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
for tag in self.root.xpath('//center'):
|
||||
tag.tag = 'div'
|
||||
tag.set('style', 'text-align:center')
|
||||
|
||||
if self.opts.linearize_tables:
|
||||
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
||||
|
@ -138,7 +138,9 @@ class Splitter(object):
|
||||
split_point, before = self.find_split_point(root)
|
||||
if split_point is None or self.split_size > 6*self.orig_size:
|
||||
if not self.always_remove:
|
||||
self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
|
||||
self.log_warn(_('\t\tToo much markup. Re-splitting without '
|
||||
'structure preservation. This may cause '
|
||||
'incorrect rendering.'))
|
||||
raise SplitError(self.path, root)
|
||||
|
||||
for t in self.do_split(tree, split_point, before):
|
||||
@ -149,7 +151,8 @@ class Splitter(object):
|
||||
if size <= self.opts.profile.flow_size:
|
||||
self.trees.append(t)
|
||||
#print tostring(t.getroot(), pretty_print=True)
|
||||
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(self.trees), size/1024.)
|
||||
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
|
||||
len(self.trees), size/1024.)
|
||||
self.split_size += size
|
||||
else:
|
||||
self.split_to_size(t)
|
||||
@ -329,10 +332,12 @@ class Splitter(object):
|
||||
'//pre',
|
||||
'//hr',
|
||||
'//p',
|
||||
'//div',
|
||||
'//br',
|
||||
'//li',
|
||||
):
|
||||
elems = root.xpath(path, namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
elems = root.xpath(path,
|
||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
elem = pick_elem(elems)
|
||||
if elem is not None:
|
||||
try:
|
||||
|
@ -267,7 +267,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
||||
except IgnoreFile, err:
|
||||
rejects.append(link)
|
||||
if not err.doesnt_exist or verbose > 1:
|
||||
print str(err)
|
||||
print repr(err)
|
||||
for link in rejects:
|
||||
hf.links.remove(link)
|
||||
|
||||
|
@ -229,7 +229,8 @@ class MetaInformation(object):
|
||||
if mi.authors and mi.authors[0] != _('Unknown'):
|
||||
self.authors = mi.authors
|
||||
|
||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||
|
||||
for attr in ('author_sort', 'title_sort', 'category',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||
'cover', 'language', 'guide', 'book_producer',
|
||||
@ -244,7 +245,16 @@ class MetaInformation(object):
|
||||
|
||||
if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
|
||||
self.cover_data = mi.cover_data
|
||||
|
||||
|
||||
my_comments = getattr(self, 'comments', '')
|
||||
other_comments = getattr(mi, 'comments', '')
|
||||
if not my_comments:
|
||||
my_comments = ''
|
||||
if not other_comments:
|
||||
other_comments = ''
|
||||
if len(other_comments.strip()) > len(my_comments.strip()):
|
||||
self.comments = other_comments
|
||||
|
||||
def format_series_index(self):
|
||||
try:
|
||||
x = float(self.series_index)
|
||||
|
153
src/calibre/ebooks/metadata/fetch.py
Normal file
153
src/calibre/ebooks/metadata/fetch.py
Normal file
@ -0,0 +1,153 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import traceback, sys, textwrap
|
||||
from threading import Thread
|
||||
|
||||
from calibre import preferred_encoding
|
||||
from calibre.utils.config import OptionParser
|
||||
|
||||
class FetchGoogle(Thread):
|
||||
name = 'Google Books'
|
||||
|
||||
def __init__(self, title, author, publisher, isbn, verbose):
|
||||
self.title = title
|
||||
self.verbose = verbose
|
||||
self.author = author
|
||||
self.publisher = publisher
|
||||
self.isbn = isbn
|
||||
Thread.__init__(self, None)
|
||||
self.daemon = True
|
||||
self.exception, self.tb = None, None
|
||||
|
||||
def run(self):
|
||||
from calibre.ebooks.metadata.google_books import search
|
||||
try:
|
||||
self.results = search(self.title, self.author, self.publisher,
|
||||
self.isbn, max_results=10,
|
||||
verbose=self.verbose)
|
||||
except Exception, e:
|
||||
self.results = []
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
|
||||
class FetchISBNDB(Thread):
|
||||
name = 'IsbnDB'
|
||||
def __init__(self, title, author, publisher, isbn, verbose, key):
|
||||
self.title = title
|
||||
self.author = author
|
||||
self.publisher = publisher
|
||||
self.isbn = isbn
|
||||
self.verbose = verbose
|
||||
Thread.__init__(self, None)
|
||||
self.daemon = True
|
||||
self.exception, self.tb = None, None
|
||||
self.key = key
|
||||
|
||||
def run(self):
|
||||
from calibre.ebooks.metadata.isbndb import option_parser, create_books
|
||||
args = ['isbndb']
|
||||
if self.isbn:
|
||||
args.extend(['--isbn', self.isbn])
|
||||
else:
|
||||
if self.title:
|
||||
args.extend(['--title', self.title])
|
||||
if self.author:
|
||||
args.extend(['--author', self.author])
|
||||
if self.publisher:
|
||||
args.extend(['--publisher', self.publisher])
|
||||
args.append(self.key)
|
||||
try:
|
||||
opts, args = option_parser().parse_args(args)
|
||||
self.results = create_books(opts, args)
|
||||
except Exception, e:
|
||||
self.results = []
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
def result_index(source, result):
|
||||
if not result.isbn:
|
||||
return -1
|
||||
for i, x in enumerate(source):
|
||||
if x.isbn == result.isbn:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def merge_results(one, two):
|
||||
for x in two:
|
||||
idx = result_index(one, x)
|
||||
if idx < 0:
|
||||
one.append(x)
|
||||
else:
|
||||
one[idx].smart_update(x)
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||
verbose=0):
|
||||
assert not(title is None and author is None and publisher is None and \
|
||||
isbn is None)
|
||||
fetchers = [FetchGoogle(title, author, publisher, isbn, verbose)]
|
||||
if isbndb_key:
|
||||
fetchers.append(FetchISBNDB(title, author, publisher, isbn, verbose,
|
||||
isbndb_key))
|
||||
|
||||
|
||||
for fetcher in fetchers:
|
||||
fetcher.start()
|
||||
for fetcher in fetchers:
|
||||
fetcher.join()
|
||||
for fetcher in fetchers[1:]:
|
||||
merge_results(fetchers[0].results, fetcher.results)
|
||||
|
||||
results = sorted(fetchers[0].results, cmp=lambda x, y : cmp(
|
||||
(x.comments.strip() if x.comments else ''),
|
||||
(y.comments.strip() if y.comments else '')
|
||||
), reverse=True)
|
||||
|
||||
return results, [(x.name, x.exception, x.tb) for x in fetchers]
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(textwrap.dedent(
|
||||
'''\
|
||||
%prog [options]
|
||||
|
||||
Fetch book metadata from online sources. You must specify at least one
|
||||
of title, author, publisher or ISBN. If you specify ISBN, the others
|
||||
are ignored.
|
||||
'''
|
||||
))
|
||||
parser.add_option('-t', '--title', help='Book title')
|
||||
parser.add_option('-a', '--author', help='Book author(s)')
|
||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
||||
parser.add_option('-m', '--max-results', default=10,
|
||||
help='Maximum number of results to fetch')
|
||||
parser.add_option('-k', '--isbndb-key',
|
||||
help=('The access key for your ISBNDB.com account. '
|
||||
'Only needed if you want to search isbndb.com'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help='Be more verbose about errors')
|
||||
return parser
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
results, exceptions = search(opts.title, opts.author, opts.publisher,
|
||||
opts.isbn, opts.isbndb_key, opts.verbose)
|
||||
for result in results:
|
||||
print unicode(result).encode(preferred_encoding)
|
||||
print
|
||||
|
||||
for name, exception, tb in exceptions:
|
||||
if exception is not None:
|
||||
print 'WARNING: Fetching from', name, 'failed with error:'
|
||||
print exception
|
||||
print tb
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
241
src/calibre/ebooks/metadata/google_books.py
Normal file
241
src/calibre/ebooks/metadata/google_books.py
Normal file
@ -0,0 +1,241 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, textwrap
|
||||
from urllib import urlencode
|
||||
from functools import partial
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre import browser, preferred_encoding
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.config import OptionParser
|
||||
|
||||
NAMESPACES = {
|
||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||
'atom' : 'http://www.w3.org/2005/Atom',
|
||||
'dc': 'http://purl.org/dc/terms'
|
||||
}
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
|
||||
total_results = XPath('//openSearch:totalResults')
|
||||
start_index = XPath('//openSearch:startIndex')
|
||||
items_per_page = XPath('//openSearch:itemsPerPage')
|
||||
entry = XPath('//atom:entry')
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
creator = XPath('descendant::dc:creator')
|
||||
identifier = XPath('descendant::dc:identifier')
|
||||
title = XPath('descendant::dc:title')
|
||||
date = XPath('descendant::dc:date')
|
||||
publisher = XPath('descendant::dc:publisher')
|
||||
subject = XPath('descendant::dc:subject')
|
||||
description = XPath('descendant::dc:description')
|
||||
language = XPath('descendant::dc:language')
|
||||
|
||||
def report(verbose):
|
||||
if verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class Query(object):
|
||||
|
||||
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
||||
|
||||
def __init__(self, title=None, author=None, publisher=None, isbn=None,
|
||||
max_results=20, min_viewability='none', start_index=1):
|
||||
assert not(title is None and author is None and publisher is None and \
|
||||
isbn is None)
|
||||
assert (max_results < 21)
|
||||
assert (min_viewability in ('none', 'partial', 'full'))
|
||||
q = ''
|
||||
if isbn is not None:
|
||||
q += 'isbn:'+isbn
|
||||
else:
|
||||
def build_term(prefix, parts):
|
||||
return ' '.join('in'+prefix + ':' + x for x in parts)
|
||||
if title is not None:
|
||||
q += build_term('title', title.split())
|
||||
if author is not None:
|
||||
q += build_term('author', author.split())
|
||||
if publisher is not None:
|
||||
q += build_term('publisher', publisher.split())
|
||||
|
||||
self.url = self.BASE_URL+urlencode({
|
||||
'q':q,
|
||||
'max-results':max_results,
|
||||
'start-index':start_index,
|
||||
'min-viewability':min_viewability,
|
||||
})
|
||||
|
||||
def __call__(self, browser, verbose):
|
||||
if verbose:
|
||||
print 'Query:', self.url
|
||||
feed = etree.fromstring(browser.open(self.url).read())
|
||||
total = int(total_results(feed)[0].text)
|
||||
start = int(start_index(feed)[0].text)
|
||||
entries = entry(feed)
|
||||
new_start = start + len(entries)
|
||||
if new_start > total:
|
||||
new_start = 0
|
||||
return entries, new_start
|
||||
|
||||
|
||||
class ResultList(list):
|
||||
|
||||
def get_description(self, entry, verbose):
|
||||
try:
|
||||
desc = description(entry)
|
||||
if desc:
|
||||
return 'SUMMARY:\n'+desc[0].text
|
||||
except:
|
||||
report(verbose)
|
||||
|
||||
def get_language(self, entry, verbose):
|
||||
try:
|
||||
l = language(entry)
|
||||
if l:
|
||||
return l[0].text
|
||||
except:
|
||||
report(verbose)
|
||||
|
||||
|
||||
|
||||
def get_title(self, entry):
|
||||
candidates = [x.text for x in title(entry)]
|
||||
candidates.sort(cmp=lambda x,y: cmp(len(x), len(y)), reverse=True)
|
||||
return candidates[0]
|
||||
|
||||
def get_authors(self, entry):
|
||||
m = creator(entry)
|
||||
if not m:
|
||||
m = []
|
||||
m = [x.text for x in m]
|
||||
return m
|
||||
|
||||
def get_author_sort(self, entry, verbose):
|
||||
for x in creator(entry):
|
||||
for key, val in x.attrib.items():
|
||||
if key.endswith('file-as'):
|
||||
return val
|
||||
|
||||
def get_identifiers(self, entry, mi):
|
||||
isbns = []
|
||||
for x in identifier(entry):
|
||||
t = str(x.text).strip()
|
||||
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
|
||||
if t[:5].upper() == 'ISBN:':
|
||||
isbns.append(t[5:])
|
||||
if isbns:
|
||||
mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
||||
|
||||
def get_tags(self, entry, verbose):
|
||||
try:
|
||||
tags = [x.text for x in subject(entry)]
|
||||
except:
|
||||
report(verbose)
|
||||
tags = []
|
||||
return tags
|
||||
|
||||
def get_publisher(self, entry, verbose):
|
||||
try:
|
||||
pub = publisher(entry)[0].text
|
||||
except:
|
||||
pub = None
|
||||
return pub
|
||||
|
||||
def get_date(self, entry, verbose):
|
||||
try:
|
||||
d = date(entry)
|
||||
if d:
|
||||
d = parser.parse(d[0].text)
|
||||
else:
|
||||
d = None
|
||||
except:
|
||||
report(verbose)
|
||||
d = None
|
||||
return d
|
||||
|
||||
def populate(self, entries, browser, verbose=False):
|
||||
for x in entries:
|
||||
try:
|
||||
id_url = entry_id(x)[0].text
|
||||
title = self.get_title(x)
|
||||
except:
|
||||
report(verbose)
|
||||
mi = MetaInformation(title, self.get_authors(x))
|
||||
try:
|
||||
raw = browser.open(id_url).read()
|
||||
feed = etree.fromstring(raw)
|
||||
x = entry(feed)[0]
|
||||
except Exception, e:
|
||||
if verbose:
|
||||
print 'Failed to get all details for an entry'
|
||||
print e
|
||||
mi.author_sort = self.get_author_sort(x, verbose)
|
||||
mi.comments = self.get_description(x, verbose)
|
||||
self.get_identifiers(x, mi)
|
||||
mi.tags = self.get_tags(x, verbose)
|
||||
mi.publisher = self.get_publisher(x, verbose)
|
||||
mi.timestamp = self.get_date(x, verbose)
|
||||
mi.language = self.get_language(x, verbose)
|
||||
self.append(mi)
|
||||
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None,
|
||||
min_viewability='none', verbose=False, max_results=40):
|
||||
br = browser()
|
||||
start, entries = 1, []
|
||||
while start > 0 and len(entries) <= max_results:
|
||||
new, start = Query(title=title, author=author, publisher=publisher,
|
||||
isbn=isbn, min_viewability=min_viewability)(br, verbose)
|
||||
if not new:
|
||||
break
|
||||
entries.extend(new)
|
||||
|
||||
entries = entries[:max_results]
|
||||
|
||||
ans = ResultList()
|
||||
ans.populate(entries, br, verbose)
|
||||
return ans
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(textwrap.dedent(
|
||||
'''\
|
||||
%prog [options]
|
||||
|
||||
Fetch book metadata from Google. You must specify one of title, author,
|
||||
publisher or ISBN. If you specify ISBN the others are ignored. Will
|
||||
fetch a maximum of 100 matches, so you should make your query as
|
||||
specific as possible.
|
||||
'''
|
||||
))
|
||||
parser.add_option('-t', '--title', help='Book title')
|
||||
parser.add_option('-a', '--author', help='Book author(s)')
|
||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
||||
parser.add_option('-m', '--max-results', default=10,
|
||||
help='Maximum number of results to fetch')
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help='Be more verbose about errors')
|
||||
return parser
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
try:
|
||||
results = search(opts.title, opts.author, opts.publisher, opts.isbn,
|
||||
verbose=opts.verbose, max_results=opts.max_results)
|
||||
except AssertionError:
|
||||
report(True)
|
||||
parser.print_help()
|
||||
return 1
|
||||
for result in results:
|
||||
print unicode(result).encode(preferred_encoding)
|
||||
print
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -4,10 +4,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Interface to isbndb.com. My key HLLXQX2A.
|
||||
'''
|
||||
|
||||
import sys, logging, re, socket
|
||||
import sys, re, socket
|
||||
from urllib import urlopen, quote
|
||||
|
||||
from calibre import setup_cli_handlers
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
@ -63,9 +62,10 @@ class ISBNDBMetadata(MetaInformation):
|
||||
|
||||
try:
|
||||
self.author_sort = book.find('authors').find('person').string
|
||||
if self.authors and self.author_sort == self.authors[0]:
|
||||
self.author_sort = None
|
||||
except:
|
||||
if self.authors:
|
||||
self.author_sort = authors_to_sort_string(self.authors)
|
||||
pass
|
||||
self.publisher = book.find('publishertext').string
|
||||
|
||||
summ = book.find('summary')
|
||||
@ -118,19 +118,15 @@ key is the account key you generate after signing up for a free account from isb
|
||||
return parser
|
||||
|
||||
|
||||
def create_books(opts, args, logger=None, timeout=5.):
|
||||
if logger is None:
|
||||
level = logging.DEBUG if opts.verbose else logging.INFO
|
||||
logger = logging.getLogger('isbndb')
|
||||
setup_cli_handlers(logger, level)
|
||||
|
||||
def create_books(opts, args, timeout=5.):
|
||||
base_url = BASE_URL%dict(key=args[1])
|
||||
if opts.isbn is not None:
|
||||
url = build_isbn(base_url, opts)
|
||||
else:
|
||||
url = build_combined(base_url, opts)
|
||||
|
||||
logger.info('ISBNDB query: '+url)
|
||||
|
||||
if opts.verbose:
|
||||
print ('ISBNDB query: '+url)
|
||||
|
||||
return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
|
||||
|
||||
@ -139,7 +135,7 @@ def main(args=sys.argv):
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
print('You must supply the isbndb.com key')
|
||||
print ('You must supply the isbndb.com key')
|
||||
return 1
|
||||
|
||||
for book in create_books(opts, args):
|
||||
|
@ -4,22 +4,74 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
GUI for fetching metadata from servers.
|
||||
'''
|
||||
|
||||
import logging, cStringIO
|
||||
import time
|
||||
|
||||
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, \
|
||||
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, QThread, \
|
||||
QAbstractTableModel, QCoreApplication, QTimer
|
||||
from PyQt4.QtGui import QDialog, QItemSelectionModel
|
||||
from PyQt4.QtGui import QDialog, QItemSelectionModel, QWidget, QLabel, QMovie
|
||||
|
||||
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
|
||||
from calibre.gui2 import error_dialog, NONE, info_dialog
|
||||
from calibre.ebooks.metadata.isbndb import create_books, option_parser, ISBNDBError
|
||||
from calibre.gui2 import error_dialog, NONE, info_dialog, warning_dialog
|
||||
from calibre.utils.config import prefs
|
||||
|
||||
class Fetcher(QThread):
|
||||
|
||||
def __init__(self, title, author, publisher, isbn, key):
|
||||
QThread.__init__(self)
|
||||
self.title = title
|
||||
self.author = author
|
||||
self.publisher = publisher
|
||||
self.isbn = isbn
|
||||
self.key = key
|
||||
|
||||
def run(self):
|
||||
from calibre.ebooks.metadata.fetch import search
|
||||
self.results, self.exceptions = search(self.title, self.author,
|
||||
self.publisher, self.isbn,
|
||||
self.key if self.key else None)
|
||||
|
||||
class ProgressIndicator(QWidget):
|
||||
|
||||
def __init__(self, *args):
|
||||
QWidget.__init__(self, *args)
|
||||
self.setGeometry(0, 0, 300, 350)
|
||||
self.movie = QMovie(':/images/jobs-animated.mng')
|
||||
self.ml = QLabel(self)
|
||||
self.ml.setMovie(self.movie)
|
||||
self.movie.start()
|
||||
self.movie.setPaused(True)
|
||||
self.status = QLabel(self)
|
||||
self.status.setWordWrap(True)
|
||||
self.status.setAlignment(Qt.AlignHCenter|Qt.AlignTop)
|
||||
self.status.font().setBold(True)
|
||||
self.status.font().setPointSize(self.font().pointSize()+6)
|
||||
self.setVisible(False)
|
||||
|
||||
def start(self, msg=''):
|
||||
view = self.parent()
|
||||
pwidth, pheight = view.size().width(), view.size().height()
|
||||
self.resize(pwidth, min(pheight, 250))
|
||||
self.move(0, (pheight-self.size().height())/2.)
|
||||
self.ml.resize(self.ml.sizeHint())
|
||||
self.ml.move(int((self.size().width()-self.ml.size().width())/2.), 0)
|
||||
self.status.resize(self.size().width(), self.size().height()-self.ml.size().height()-10)
|
||||
self.status.move(0, self.ml.size().height()+10)
|
||||
self.status.setText(msg)
|
||||
self.setVisible(True)
|
||||
self.movie.setPaused(False)
|
||||
|
||||
def stop(self):
|
||||
if self.movie.state() == self.movie.Running:
|
||||
self.movie.setPaused(True)
|
||||
self.setVisible(False)
|
||||
|
||||
class Matches(QAbstractTableModel):
|
||||
|
||||
def __init__(self, matches):
|
||||
self.matches = matches
|
||||
self.matches.sort(cmp=lambda b, a: cmp(len(a.comments if a.comments else ''), len(b.comments if b.comments else '')))
|
||||
self.matches.sort(cmp=lambda b, a: \
|
||||
cmp(len(a.comments if a.comments else ''),
|
||||
len(b.comments if b.comments else '')))
|
||||
QAbstractTableModel.__init__(self)
|
||||
|
||||
def rowCount(self, *args):
|
||||
@ -73,22 +125,23 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
||||
Ui_FetchMetadata.__init__(self)
|
||||
self.setupUi(self)
|
||||
|
||||
self.pi = ProgressIndicator(self)
|
||||
self.timeout = timeout
|
||||
QObject.connect(self.fetch, SIGNAL('clicked()'), self.fetch_metadata)
|
||||
|
||||
self.key.setText(prefs['isbndb_com_key'])
|
||||
|
||||
self.setWindowTitle(title if title else 'Unknown')
|
||||
self.tlabel.setText(self.tlabel.text().arg(title if title else 'Unknown'))
|
||||
self.setWindowTitle(title if title else _('Unknown'))
|
||||
self.isbn = isbn
|
||||
self.title = title
|
||||
self.author = author.strip()
|
||||
self.publisher = publisher
|
||||
self.previous_row = None
|
||||
self.connect(self.matches, SIGNAL('activated(QModelIndex)'), self.chosen)
|
||||
key = str(self.key.text())
|
||||
if key:
|
||||
QTimer.singleShot(100, self.fetch_metadata)
|
||||
self.connect(self.matches, SIGNAL('entered(QModelIndex)'),
|
||||
lambda index:self.matches.setCurrentIndex(index))
|
||||
self.matches.setMouseTracking(True)
|
||||
self.fetch_metadata()
|
||||
|
||||
|
||||
def show_summary(self, current, previous):
|
||||
@ -100,53 +153,58 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
||||
|
||||
def fetch_metadata(self):
|
||||
key = str(self.key.text())
|
||||
if not key:
|
||||
error_dialog(self, _('Cannot connect'),
|
||||
_('You must specify a valid access key for isbndb.com'))
|
||||
return
|
||||
else:
|
||||
if key:
|
||||
prefs['isbndb_com_key'] = key
|
||||
|
||||
args = ['isbndb']
|
||||
else:
|
||||
key = None
|
||||
title = author = publisher = isbn = None
|
||||
if self.isbn:
|
||||
args.extend(('--isbn', self.isbn))
|
||||
isbn = self.isbn
|
||||
if self.title:
|
||||
args.extend(('--title', self.title))
|
||||
if self.author and not self.author == 'Unknown':
|
||||
args.extend(('--author', self.author))
|
||||
#if self.publisher:
|
||||
# args.extend(('--publisher', self.publisher))
|
||||
|
||||
title = self.title
|
||||
if self.author and not self.author == _('Unknown'):
|
||||
author = self.author
|
||||
self.fetch.setEnabled(False)
|
||||
self.setCursor(Qt.WaitCursor)
|
||||
QCoreApplication.instance().processEvents()
|
||||
self.fetcher = Fetcher(title, author, publisher, isbn, key)
|
||||
self.fetcher.start()
|
||||
self.pi.start(_('Finding metadata...'))
|
||||
self._hangcheck = QTimer(self)
|
||||
self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
|
||||
self.start_time = time.time()
|
||||
self._hangcheck.start()
|
||||
|
||||
def hangcheck(self):
|
||||
if not (self.fetcher.isFinished() or time.time() - self.start_time > 75):
|
||||
return
|
||||
self._hangcheck.stop()
|
||||
try:
|
||||
args.append(key)
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
|
||||
self.logger = logging.getLogger('Job #'+str(id))
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
self.log_dest = cStringIO.StringIO()
|
||||
handler = logging.StreamHandler(self.log_dest)
|
||||
handler.setLevel(logging.DEBUG)
|
||||
handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s'))
|
||||
self.logger.addHandler(handler)
|
||||
|
||||
try:
|
||||
books = create_books(opts, args, self.logger, self.timeout)
|
||||
except ISBNDBError, err:
|
||||
error_dialog(self, _('Error fetching metadata'), str(err)).exec_()
|
||||
if self.fetcher.isRunning():
|
||||
error_dialog(self, _('Could not find metadata'),
|
||||
_('The metadata download seems to have stalled. '
|
||||
'Try again later.')).exec_()
|
||||
self.fetcher.terminate()
|
||||
return
|
||||
self.model = Matches(self.fetcher.results)
|
||||
warnings = [(x[0], unicode(x[1])) for x in \
|
||||
self.fetcher.exceptions if x[1] is not None]
|
||||
if warnings:
|
||||
warnings='<br>'.join(['<b>%s</b>: %s'%(name, exc) for name,exc in warnings])
|
||||
warning_dialog(self, _('Warning'),
|
||||
'<p>'+_('Could not fetch metadata from:')+\
|
||||
'<br><br>'+warnings+'</p>').exec_()
|
||||
if self.model.rowCount() < 1:
|
||||
info_dialog(self, _('No metadata found'),
|
||||
_('No metadata found, try adjusting the title and author '
|
||||
'or the ISBN key.')).exec_()
|
||||
self.reject()
|
||||
return
|
||||
|
||||
self.model = Matches(books)
|
||||
if self.model.rowCount() < 1:
|
||||
info_dialog(self, _('No metadata found'), _('No metadata found, try adjusting the title and author or the ISBN key.')).exec_()
|
||||
self.reject()
|
||||
|
||||
self.matches.setModel(self.model)
|
||||
QObject.connect(self.matches.selectionModel(), SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
|
||||
self.show_summary)
|
||||
QObject.connect(self.matches.selectionModel(),
|
||||
SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
|
||||
self.show_summary)
|
||||
self.model.reset()
|
||||
self.matches.selectionModel().select(self.model.index(0, 0),
|
||||
QItemSelectionModel.Select | QItemSelectionModel.Rows)
|
||||
@ -155,9 +213,9 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
||||
self.fetch.setEnabled(True)
|
||||
self.unsetCursor()
|
||||
self.matches.resizeColumnsToContents()
|
||||
self.pi.stop()
|
||||
|
||||
|
||||
|
||||
|
||||
def selected_book(self):
|
||||
try:
|
||||
return self.matches.model().matches[self.matches.currentIndex().row()]
|
||||
|
@ -23,20 +23,13 @@
|
||||
<item>
|
||||
<widget class="QLabel" name="tlabel" >
|
||||
<property name="text" >
|
||||
<string>Fetching metadata for <b>%1</b></string>
|
||||
<string><p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and exter you access key below.</string>
|
||||
</property>
|
||||
<property name="alignment" >
|
||||
<set>Qt::AlignCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="label" >
|
||||
<property name="text" >
|
||||
<string>Sign up for a free account from <a href="http://www.isbndb.com">ISBNdb.com</a> to get an access key.</string>
|
||||
</property>
|
||||
<property name="alignment" >
|
||||
<set>Qt::AlignCenter</set>
|
||||
<property name="wordWrap" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="openExternalLinks" >
|
||||
<bool>true</bool>
|
||||
|
@ -219,6 +219,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
|
||||
return QVariant(category + ' [%d]'%num)
|
||||
elif role == Qt.FontRole:
|
||||
return self.bold_font
|
||||
elif role == Qt.ForegroundRole and category == _('Scheduled'):
|
||||
return QVariant(QColor(0, 255, 0))
|
||||
return NONE
|
||||
|
||||
def update_recipe_schedule(self, recipe):
|
||||
|
BIN
src/calibre/gui2/images/news/24sata.png
Normal file
BIN
src/calibre/gui2/images/news/24sata.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 671 B |
BIN
src/calibre/gui2/images/news/adventuregamers.png
Normal file
BIN
src/calibre/gui2/images/news/adventuregamers.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 686 B |
BIN
src/calibre/gui2/images/news/instapaper.png
Normal file
BIN
src/calibre/gui2/images/news/instapaper.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.7 KiB |
BIN
src/calibre/gui2/images/news/nacional_cro.png
Normal file
BIN
src/calibre/gui2/images/news/nacional_cro.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 315 B |
BIN
src/calibre/gui2/images/news/vecernji_list.png
Normal file
BIN
src/calibre/gui2/images/news/vecernji_list.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 514 B |
@ -94,7 +94,8 @@ class DateDelegate(QStyledItemDelegate):
|
||||
def createEditor(self, parent, option, index):
|
||||
qde = QStyledItemDelegate.createEditor(self, parent, option, index)
|
||||
qde.setDisplayFormat('MM/dd/yyyy')
|
||||
qde.setMinimumDate(QDate(100,1,1))
|
||||
qde.setMinimumDate(QDate(-4000,1,1))
|
||||
qde.setCalendarPopup(True)
|
||||
return qde
|
||||
|
||||
class BooksModel(QAbstractTableModel):
|
||||
@ -824,7 +825,7 @@ class DeviceBooksModel(BooksModel):
|
||||
|
||||
|
||||
def search(self, text, refinement, reset=True):
|
||||
if not text:
|
||||
if not text or not text.strip():
|
||||
self.map = list(range(len(self.db)))
|
||||
else:
|
||||
matches = self.search_engine.parse(text)
|
||||
|
@ -94,6 +94,7 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
self.viewers = collections.deque()
|
||||
self.content_server = None
|
||||
self.system_tray_icon = QSystemTrayIcon(QIcon(':/library'), self)
|
||||
self.system_tray_icon.setObjectName('calibre')
|
||||
if not config['systray_icon']:
|
||||
self.system_tray_icon.hide()
|
||||
else:
|
||||
|
@ -92,7 +92,6 @@ class ProgressIndicator(QWidget):
|
||||
|
||||
def stop(self):
|
||||
if self.movie.state() == self.movie.Running:
|
||||
#self.movie.jumpToFrame(0)
|
||||
self.movie.setPaused(True)
|
||||
self.setVisible(False)
|
||||
|
||||
|
@ -40,6 +40,7 @@ entry_points = {
|
||||
'calibre-parallel = calibre.parallel:main',
|
||||
'calibre-customize = calibre.customize.ui:main',
|
||||
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
|
||||
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
||||
],
|
||||
'gui_scripts' : [
|
||||
__appname__+' = calibre.gui2.main:main',
|
||||
@ -157,6 +158,7 @@ def setup_completion(fatal_errors):
|
||||
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
|
||||
from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi
|
||||
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
|
||||
from calibre.ebooks.metadata.fetch import option_parser as fem_op
|
||||
from calibre.gui2.main import option_parser as guiop
|
||||
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
||||
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
|
||||
@ -191,6 +193,7 @@ def setup_completion(fatal_errors):
|
||||
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
||||
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
|
||||
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
|
||||
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
|
||||
f.write('''
|
||||
_prs500_ls()
|
||||
{
|
||||
|
@ -205,6 +205,9 @@ class Server(object):
|
||||
ax.set_xlabel('Days ago')
|
||||
ax.set_ylabel('Income ($)')
|
||||
ax.hlines([stats.daily_average], 0, days-1)
|
||||
ax.hlines([stats.daily_average+stats.daily_deviation,
|
||||
stats.daily_average-stats.daily_deviation], 0, days-1,
|
||||
linestyle=':',color='r')
|
||||
ax.set_xlim([0, days-1])
|
||||
text = u'''\
|
||||
Total: $%(total).2f
|
||||
|
@ -358,12 +358,9 @@ else:
|
||||
def extract_tarball(tar, destdir):
|
||||
print 'Extracting application files...'
|
||||
if hasattr(tar, 'read'):
|
||||
try:
|
||||
tarfile.open(fileobj=tar, mode='r').extractall(destdir)
|
||||
except: # tarfile.py on Fedora 9 is buggy
|
||||
subprocess.check_call(['tar', 'xjf', tar.name, '-C', destdir])
|
||||
subprocess.check_call(['tar', 'xjf', tar.name, '-C', destdir])
|
||||
else:
|
||||
tarfile.open(tar, 'r').extractall(destdir)
|
||||
subprocess.check_call(['tar', 'xjf', tar, '-C', destdir])
|
||||
|
||||
def main():
|
||||
defdir = '/opt/calibre'
|
||||
@ -382,5 +379,4 @@ else:
|
||||
pi = os.path.join(destdir, 'calibre_postinstall')
|
||||
subprocess.call(pi, shell=True)
|
||||
return 0
|
||||
''')
|
||||
|
||||
''')
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
5841
src/calibre/translations/hr.po
Normal file
5841
src/calibre/translations/hr.po
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -34,7 +34,9 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
||||
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
|
||||
'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
|
||||
'new_york_review_of_books_no_sub', 'politico',
|
||||
'new_york_review_of_books_no_sub', 'politico', 'adventuregamers',
|
||||
'mondedurable', 'instapaper', 'dnevnik_cro', 'vecernji_list',
|
||||
'nacional_cro', '24sata',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
57
src/calibre/web/feeds/recipes/recipe_24sata.py
Normal file
57
src/calibre/web/feeds/recipes/recipe_24sata.py
Normal file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "News Portal from Croatia"
|
||||
publisher = '24sata.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='table', attrs={'class':'enumbox'})
|
||||
]
|
||||
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=ispis'
|
||||
|
75
src/calibre/web/feeds/recipes/recipe_adventuregamers.py
Normal file
75
src/calibre/web/feeds/recipes/recipe_adventuregamers.py
Normal file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.adventuregamers.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdventureGamers(BasicNewsRecipe):
|
||||
title = u'Adventure Gamers'
|
||||
language = _('English')
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Adventure games portal'
|
||||
publisher = 'Adventure Gamers'
|
||||
category = 'news, games, adventure, technology'
|
||||
language = _('English')
|
||||
oldest_article = 10
|
||||
delay = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.adventuregamers.com'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'content_middle'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','form'])
|
||||
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
self.append_page(soup, soup.body, 3)
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
return soup
|
60
src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py
Normal file
60
src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py
Normal file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
dnevnik.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class DnevnikCro(BasicNewsRecipe):
|
||||
title = 'Dnevnik - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Vijesti iz Hrvatske"
|
||||
publisher = 'Dnevnik.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='div', attrs={'class':'menu'})
|
||||
,dict(name='div', attrs={'id':'video'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'content'})
|
||||
|
||||
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
74
src/calibre/web/feeds/recipes/recipe_instapaper.py
Normal file
74
src/calibre/web/feeds/recipes/recipe_instapaper.py
Normal file
@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.instapaper.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Instapaper(BasicNewsRecipe):
|
||||
title = 'Instapaper.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Personalized news feeds. Go to instapaper.com to setup up your news.'
|
||||
publisher = 'Instapaper.com'
|
||||
category = 'news, custom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
INDEX = u'http://www.instapaper.com'
|
||||
LOGIN = INDEX + u'/user/login'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
|
||||
feeds = [
|
||||
(u'Unread articles' , INDEX + u'/u' )
|
||||
,(u'Starred articles', INDEX + u'/starred')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr=0)
|
||||
br['username'] = self.username
|
||||
if self.password is not None:
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class':'titleRow'}):
|
||||
description = self.tag_to_string(item.div)
|
||||
atag = item.a
|
||||
if atag and atag.has_key('href'):
|
||||
url = self.INDEX + atag['href'] + '/text'
|
||||
title = self.tag_to_string(atag)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
45
src/calibre/web/feeds/recipes/recipe_mondedurable.py
Normal file
45
src/calibre/web/feeds/recipes/recipe_mondedurable.py
Normal file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
mondedurable.science-et-vie.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdventureGamers(BasicNewsRecipe):
|
||||
title = 'Monde durable'
|
||||
language = _('French')
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'science news'
|
||||
publisher = 'Monde durable'
|
||||
category = 'environnement, developpement durable, science & vie, science et vie'
|
||||
oldest_article = 30
|
||||
delay = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','form','img'])]
|
||||
|
||||
feeds = [(u'Articles', u'http://mondedurable.science-et-vie.com/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
55
src/calibre/web/feeds/recipes/recipe_nacional_cro.py
Normal file
55
src/calibre/web/feeds/recipes/recipe_nacional_cro.py
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
nacional.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class NacionalCro(BasicNewsRecipe):
|
||||
title = 'Nacional - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "news from Croatia"
|
||||
publisher = 'Nacional.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed'])]
|
||||
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
rest, sep, disc = url.rpartition('/')
|
||||
return rest.replace('/clanak/','/clanak/print/')
|
||||
|
@ -14,9 +14,10 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
description = 'News from Germany'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 40
|
||||
language = _('German')
|
||||
timefmt = ' [%d %b %Y]'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
encoding = 'latin1'
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'artikelBox navigatorBox'})]
|
||||
@ -27,6 +28,7 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'footerLinks'}),
|
||||
dict(name='div', attrs={'class':'seitenanfang'}),
|
||||
dict(name='td', attrs={'class':'mar5'}),
|
||||
dict(name='a', attrs={'class':'top'}),
|
||||
dict(name='table', attrs={'class':'pageAktiv'}),
|
||||
dict(name='table', attrs={'class':'xartable'}),
|
||||
dict(name='table', attrs={'class':'wpnavi'}),
|
||||
@ -39,8 +41,20 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'similar-article-box'}),
|
||||
dict(name='div', attrs={'class':'videoBigHack'}),
|
||||
dict(name='td', attrs={'class':'artikelDruckenRight'}),
|
||||
dict(name='td', attrs={'class':'stoerBSbgUnten'}),
|
||||
dict(name='li', attrs={'class':'first'}),
|
||||
dict(name='li', attrs={'class':'bookmark closed'}),
|
||||
dict(name='li', attrs={'class':'print'}),
|
||||
dict(name='li', attrs={'class':'mail'}),
|
||||
dict(name='li', attrs={'class':'last'}),
|
||||
dict(name='li', attrs={'class':'tiefethemen'}),
|
||||
dict(name='li', attrs={'class':'prev'}),
|
||||
dict(name='ul', attrs={'class':'activities'}),
|
||||
dict(name='li', attrs={'class':'next'}),
|
||||
dict(name='span', attrs={'class':'hidePrint'}),
|
||||
dict(id='headerLBox'),
|
||||
dict(id='bookmarklist1'),
|
||||
dict(id='bookmarklist2'),
|
||||
dict(id='rechteSpalte'),
|
||||
dict(id='newsticker-list-small'),
|
||||
dict(id='ntop5'),
|
||||
@ -57,7 +71,4 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
|
||||
feeds = [ (u'Sueddeutsche', u'http://www.sueddeutsche.de/app/service/rss/alles/rss.xml') ]
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
return soup
|
||||
|
||||
|
58
src/calibre/web/feeds/recipes/recipe_vecernji_list.py
Normal file
58
src/calibre/web/feeds/recipes/recipe_vecernji_list.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.vecernji.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class VecernjiList(BasicNewsRecipe):
|
||||
title = 'Vecernji List'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Vecernji.hr je vodeci hrvatski news portal. Cilj je biti prvi u objavljivanju svih vijesti iz Hrvatske, svijeta, sporta, gospodarstva, showbiza i jos mnogo vise. Uz cjelodnevni rad, novinari objavljuju preko 300 raznih vijesti svakoga dana. Vecernji.hr prati sve vaznije dogadaje specijalnim izvjestajima, video specijalima i foto galerijama."
|
||||
publisher = 'Vecernji.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
language = _('Croatian')
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='table', attrs={'class':'enumbox'})
|
||||
]
|
||||
|
||||
feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = 'hr-HR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/index.do','/print.do')
|
||||
|
4
todo
4
todo
@ -8,3 +8,7 @@
|
||||
* Rationalize books table. Add a pubdate column, remove the uri column (and associated support in add_books) and convert series_index to a float.
|
||||
|
||||
* Replace single application stuff with Listener from multiprocessing
|
||||
|
||||
* Refactor add books to use a separate process named calibre-worker-add
|
||||
- Dont use the process for adding a single book
|
||||
- Use a process pool for speed
|
||||
|
Loading…
x
Reference in New Issue
Block a user