mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
0774cec024
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008 - 2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = 'Copyright 2011 Starson17'
|
||||
'''
|
||||
engadget.com
|
||||
'''
|
||||
@ -9,14 +9,29 @@ engadget.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Engadget(BasicNewsRecipe):
|
||||
title = u'Engadget'
|
||||
__author__ = 'Darko Miletic'
|
||||
title = u'Engadget_Full'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = 'v1.00'
|
||||
__date__ = '02, July 2011'
|
||||
description = 'Tech news'
|
||||
language = 'en'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||
|
||||
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
80
recipes/scmp.recipe
Normal file
80
recipes/scmp.recipe
Normal file
@ -0,0 +1,80 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
scmp.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SCMP(BasicNewsRecipe):
|
||||
title = 'South China Morning Post'
|
||||
__author__ = 'llam'
|
||||
description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China."
|
||||
publisher = 'South China Morning Post Publishers Ltd.'
|
||||
category = 'SCMP, Online news, Hong Kong News, China news, Business news, English newspaper, daily newspaper, Lifestyle news, Sport news, Audio Video news, Asia news, World news, economy news, investor relations news, RSS Feeds'
|
||||
oldest_article = 2
|
||||
delay = 1
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'en_CN'
|
||||
remove_empty_feeds = True
|
||||
needs_subscription = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.scmp.com/images/logo_scmp_home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_responses(True)
|
||||
#br.set_debug_redirects(True)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.scmp.com/portal/site/SCMP/')
|
||||
br.select_form(name='loginForm')
|
||||
br['Login' ] = self.username
|
||||
br['Password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
remove_attributes=['width','height','border']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':['ART','photoBox']})
|
||||
,dict(attrs={'class':['article_label','article_byline','article_body']})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<P><table((?!<table).)*class="embscreen"((?!</table>).)*</table>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Business' , u'http://www.scmp.com/rss/business.xml' )
|
||||
,(u'Hong Kong' , u'http://www.scmp.com/rss/hong_kong.xml' )
|
||||
,(u'China' , u'http://www.scmp.com/rss/china.xml' )
|
||||
,(u'Asia & World' , u'http://www.scmp.com/rss/news_asia_world.xml')
|
||||
,(u'Opinion' , u'http://www.scmp.com/rss/opinion.xml' )
|
||||
,(u'LifeSTYLE' , u'http://www.scmp.com/rss/lifestyle.xml' )
|
||||
,(u'Sport' , u'http://www.scmp.com/rss/sport.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
rpart, sep, rest = url.rpartition('&')
|
||||
return rpart #+ sep + urllib.quote_plus(rest)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
items = soup.findAll(src="/images/label_icon.gif")
|
||||
[item.extract() for item in items]
|
||||
return self.adeify_images(soup)
|
@ -22,7 +22,7 @@ from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||
from calibre.utils.icu import strcmp
|
||||
from calibre.utils.config import to_json, from_json, prefs, tweaks
|
||||
from calibre.utils.date import utcfromtimestamp
|
||||
from calibre.utils.date import utcfromtimestamp, parse_date
|
||||
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
|
||||
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable)
|
||||
# }}}
|
||||
@ -248,7 +248,10 @@ class DB(object, SchemaUpgrade):
|
||||
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
|
||||
''')
|
||||
|
||||
def initialize_prefs(self, default_prefs):
|
||||
self.initialize_custom_columns()
|
||||
self.initialize_tables()
|
||||
|
||||
def initialize_prefs(self, default_prefs): # {{{
|
||||
self.prefs = DBPrefs(self)
|
||||
|
||||
if default_prefs is not None and not self._exists:
|
||||
@ -339,6 +342,222 @@ class DB(object, SchemaUpgrade):
|
||||
cats_changed = True
|
||||
if cats_changed:
|
||||
self.prefs.set('user_categories', user_cats)
|
||||
# }}}
|
||||
|
||||
def initialize_custom_columns(self): # {{{
|
||||
with self.conn:
|
||||
# Delete previously marked custom columns
|
||||
for record in self.conn.get(
|
||||
'SELECT id FROM custom_columns WHERE mark_for_delete=1'):
|
||||
num = record[0]
|
||||
table, lt = self.custom_table_names(num)
|
||||
self.conn.execute('''\
|
||||
DROP INDEX IF EXISTS {table}_idx;
|
||||
DROP INDEX IF EXISTS {lt}_aidx;
|
||||
DROP INDEX IF EXISTS {lt}_bidx;
|
||||
DROP TRIGGER IF EXISTS fkc_update_{lt}_a;
|
||||
DROP TRIGGER IF EXISTS fkc_update_{lt}_b;
|
||||
DROP TRIGGER IF EXISTS fkc_insert_{lt};
|
||||
DROP TRIGGER IF EXISTS fkc_delete_{lt};
|
||||
DROP TRIGGER IF EXISTS fkc_insert_{table};
|
||||
DROP TRIGGER IF EXISTS fkc_delete_{table};
|
||||
DROP VIEW IF EXISTS tag_browser_{table};
|
||||
DROP VIEW IF EXISTS tag_browser_filtered_{table};
|
||||
DROP TABLE IF EXISTS {table};
|
||||
DROP TABLE IF EXISTS {lt};
|
||||
'''.format(table=table, lt=lt)
|
||||
)
|
||||
self.conn.execute('DELETE FROM custom_columns WHERE mark_for_delete=1')
|
||||
|
||||
# Load metadata for custom columns
|
||||
self.custom_column_label_map, self.custom_column_num_map = {}, {}
|
||||
triggers = []
|
||||
remove = []
|
||||
custom_tables = self.custom_tables
|
||||
for record in self.conn.get(
|
||||
'SELECT label,name,datatype,editable,display,normalized,id,is_multiple FROM custom_columns'):
|
||||
data = {
|
||||
'label':record[0],
|
||||
'name':record[1],
|
||||
'datatype':record[2],
|
||||
'editable':bool(record[3]),
|
||||
'display':json.loads(record[4]),
|
||||
'normalized':bool(record[5]),
|
||||
'num':record[6],
|
||||
'is_multiple':bool(record[7]),
|
||||
}
|
||||
if data['display'] is None:
|
||||
data['display'] = {}
|
||||
# set up the is_multiple separator dict
|
||||
if data['is_multiple']:
|
||||
if data['display'].get('is_names', False):
|
||||
seps = {'cache_to_list': '|', 'ui_to_list': '&', 'list_to_ui': ' & '}
|
||||
elif data['datatype'] == 'composite':
|
||||
seps = {'cache_to_list': ',', 'ui_to_list': ',', 'list_to_ui': ', '}
|
||||
else:
|
||||
seps = {'cache_to_list': '|', 'ui_to_list': ',', 'list_to_ui': ', '}
|
||||
else:
|
||||
seps = {}
|
||||
data['multiple_seps'] = seps
|
||||
|
||||
table, lt = self.custom_table_names(data['num'])
|
||||
if table not in custom_tables or (data['normalized'] and lt not in
|
||||
custom_tables):
|
||||
remove.append(data)
|
||||
continue
|
||||
|
||||
self.custom_column_label_map[data['label']] = data['num']
|
||||
self.custom_column_num_map[data['num']] = \
|
||||
self.custom_column_label_map[data['label']] = data
|
||||
|
||||
# Create Foreign Key triggers
|
||||
if data['normalized']:
|
||||
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%lt
|
||||
else:
|
||||
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%table
|
||||
triggers.append(trigger)
|
||||
|
||||
if remove:
|
||||
with self.conn:
|
||||
for data in remove:
|
||||
prints('WARNING: Custom column %r not found, removing.' %
|
||||
data['label'])
|
||||
self.conn.execute('DELETE FROM custom_columns WHERE id=?',
|
||||
(data['num'],))
|
||||
|
||||
if triggers:
|
||||
with self.conn:
|
||||
self.conn.execute('''\
|
||||
CREATE TEMP TRIGGER custom_books_delete_trg
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
%s
|
||||
END;
|
||||
'''%(' \n'.join(triggers)))
|
||||
|
||||
# Setup data adapters
|
||||
def adapt_text(x, d):
|
||||
if d['is_multiple']:
|
||||
if x is None:
|
||||
return []
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
x = x.split(d['multiple_seps']['ui_to_list'])
|
||||
x = [y.strip() for y in x if y.strip()]
|
||||
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
||||
unicode) else y for y in x]
|
||||
return [u' '.join(y.split()) for y in x]
|
||||
else:
|
||||
return x if x is None or isinstance(x, unicode) else \
|
||||
x.decode(preferred_encoding, 'replace')
|
||||
|
||||
def adapt_datetime(x, d):
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||
return x
|
||||
|
||||
def adapt_bool(x, d):
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
x = x.lower()
|
||||
if x == 'true':
|
||||
x = True
|
||||
elif x == 'false':
|
||||
x = False
|
||||
elif x == 'none':
|
||||
x = None
|
||||
else:
|
||||
x = bool(int(x))
|
||||
return x
|
||||
|
||||
def adapt_enum(x, d):
|
||||
v = adapt_text(x, d)
|
||||
if not v:
|
||||
v = None
|
||||
return v
|
||||
|
||||
def adapt_number(x, d):
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
if x.lower() == 'none':
|
||||
return None
|
||||
if d['datatype'] == 'int':
|
||||
return int(x)
|
||||
return float(x)
|
||||
|
||||
self.custom_data_adapters = {
|
||||
'float': adapt_number,
|
||||
'int': adapt_number,
|
||||
'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
|
||||
'bool': adapt_bool,
|
||||
'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),
|
||||
'datetime' : adapt_datetime,
|
||||
'text':adapt_text,
|
||||
'series':adapt_text,
|
||||
'enumeration': adapt_enum
|
||||
}
|
||||
|
||||
# Create Tag Browser categories for custom columns
|
||||
for k in sorted(self.custom_column_label_map.iterkeys()):
|
||||
v = self.custom_column_label_map[k]
|
||||
if v['normalized']:
|
||||
is_category = True
|
||||
else:
|
||||
is_category = False
|
||||
is_m = v['multiple_seps']
|
||||
tn = 'custom_column_{0}'.format(v['num'])
|
||||
self.field_metadata.add_custom_field(label=v['label'],
|
||||
table=tn, column='value', datatype=v['datatype'],
|
||||
colnum=v['num'], name=v['name'], display=v['display'],
|
||||
is_multiple=is_m, is_category=is_category,
|
||||
is_editable=v['editable'], is_csp=False)
|
||||
|
||||
# }}}
|
||||
|
||||
def initialize_tables(self): # {{{
|
||||
tables = self.tables = {}
|
||||
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
|
||||
'timestamp', 'published', 'uuid', 'path', 'cover',
|
||||
'last_modified'):
|
||||
metadata = self.field_metadata[col].copy()
|
||||
if metadata['table'] is None:
|
||||
metadata['table'], metadata['column'] == 'books', ('has_cover'
|
||||
if col == 'cover' else col)
|
||||
tables[col] = OneToOneTable(col, metadata)
|
||||
|
||||
for col in ('series', 'publisher', 'rating'):
|
||||
tables[col] = ManyToOneTable(col, self.field_metadata[col].copy())
|
||||
|
||||
for col in ('authors', 'tags', 'formats', 'identifiers'):
|
||||
cls = {
|
||||
'authors':AuthorsTable,
|
||||
'formats':FormatsTable,
|
||||
'identifiers':IdentifiersTable,
|
||||
}.get(col, ManyToManyTable)
|
||||
tables[col] = cls(col, self.field_metadata[col].copy())
|
||||
|
||||
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
|
||||
|
||||
for label, data in self.custom_column_label_map.iteritems():
|
||||
metadata = self.field_metadata[label].copy()
|
||||
link_table = self.custom_table_names(data['num'])[1]
|
||||
|
||||
if data['normalized']:
|
||||
if metadata['is_multiple']:
|
||||
tables[label] = ManyToManyTable(label, metadata,
|
||||
link_table=link_table)
|
||||
else:
|
||||
tables[label] = ManyToOneTable(label, metadata,
|
||||
link_table=link_table)
|
||||
if metadata['datatype'] == 'series':
|
||||
# Create series index table
|
||||
label += '_index'
|
||||
metadata = self.field_metadata[label].copy()
|
||||
metadata['column'] = 'extra'
|
||||
metadata['table'] = link_table
|
||||
tables[label] = OneToOneTable(label, metadata)
|
||||
else:
|
||||
tables[label] = OneToOneTable(label, metadata)
|
||||
# }}}
|
||||
|
||||
@property
|
||||
def conn(self):
|
||||
@ -372,6 +591,15 @@ class DB(object, SchemaUpgrade):
|
||||
|
||||
# Database layer API {{{
|
||||
|
||||
def custom_table_names(self, num):
|
||||
return 'custom_column_%d'%num, 'books_custom_column_%d_link'%num
|
||||
|
||||
@property
|
||||
def custom_tables(self):
|
||||
return set([x[0] for x in self.conn.get(
|
||||
'SELECT name FROM sqlite_master WHERE type="table" AND '
|
||||
'(name GLOB "custom_column_*" OR name GLOB "books_custom_column_*")')])
|
||||
|
||||
@classmethod
|
||||
def exists_at(cls, path):
|
||||
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
||||
@ -405,39 +633,18 @@ class DB(object, SchemaUpgrade):
|
||||
return utcfromtimestamp(os.stat(self.dbpath).st_mtime)
|
||||
|
||||
def read_tables(self):
|
||||
tables = {}
|
||||
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
|
||||
'timestamp', 'published', 'uuid', 'path', 'cover',
|
||||
'last_modified'):
|
||||
metadata = self.field_metadata[col].copy()
|
||||
if metadata['table'] is None:
|
||||
metadata['table'], metadata['column'] == 'books', ('has_cover'
|
||||
if col == 'cover' else col)
|
||||
tables[col] = OneToOneTable(col, metadata)
|
||||
|
||||
for col in ('series', 'publisher', 'rating'):
|
||||
tables[col] = ManyToOneTable(col, self.field_metadata[col].copy())
|
||||
|
||||
for col in ('authors', 'tags', 'formats', 'identifiers'):
|
||||
cls = {
|
||||
'authors':AuthorsTable,
|
||||
'formats':FormatsTable,
|
||||
'identifiers':IdentifiersTable,
|
||||
}.get(col, ManyToManyTable)
|
||||
tables[col] = cls(col, self.field_metadata[col].copy())
|
||||
|
||||
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
|
||||
'''
|
||||
Read all data from the db into the python in-memory tables
|
||||
'''
|
||||
|
||||
with self.conn: # Use a single transaction, to ensure nothing modifies
|
||||
# the db while we are reading
|
||||
for table in tables.itervalues():
|
||||
for table in self.tables.itervalues():
|
||||
try:
|
||||
table.read()
|
||||
except:
|
||||
prints('Failed to read table:', table.name)
|
||||
raise
|
||||
|
||||
return tables
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -32,7 +32,7 @@ def _c_convert_timestamp(val):
|
||||
|
||||
class Table(object):
|
||||
|
||||
def __init__(self, name, metadata):
|
||||
def __init__(self, name, metadata, link_table=None):
|
||||
self.name, self.metadata = name, metadata
|
||||
|
||||
# self.adapt() maps values from the db to python objects
|
||||
@ -46,6 +46,9 @@ class Table(object):
|
||||
# Legacy
|
||||
self.adapt = lambda x: x.replace('|', ',') if x else None
|
||||
|
||||
self.link_table = (link_table if link_table else
|
||||
'books_%s_link'%self.metadata['table'])
|
||||
|
||||
class OneToOneTable(Table):
|
||||
|
||||
'''
|
||||
@ -95,8 +98,8 @@ class ManyToOneTable(Table):
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute(
|
||||
'SELECT book, {0} FROM books_{1}_link'.format(
|
||||
self.metadata['link_column'], self.metadata['table'])):
|
||||
'SELECT book, {0} FROM {1}'.format(
|
||||
self.metadata['link_column'], self.link_table)):
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map.append(row[0])
|
||||
@ -112,8 +115,8 @@ class ManyToManyTable(ManyToOneTable):
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute(
|
||||
'SELECT book, {0} FROM books_{1}_link'.format(
|
||||
self.metadata['link_column'], self.metadata['table'])):
|
||||
'SELECT book, {0} FROM {1}'.format(
|
||||
self.metadata['link_column'], self.link_table)):
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map.append(row[0])
|
||||
|
Loading…
x
Reference in New Issue
Block a user