mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
a5baa3c1c6
@ -11,7 +11,7 @@
|
||||
- title: "Page turn animations in the e-book viewer"
|
||||
type: major
|
||||
description: >
|
||||
"Now when you use the Page Down/Page Up keys or the next/previous page buttons in the viewer, page turning will be animated. The duration of the animation can be controlled in the viewer preferences. Setting it to o disables the animation completely."
|
||||
"Now when you use the Page Down/Page Up keys or the next/previous page buttons in the viewer, page turning will be animated. The duration of the animation can be controlled in the viewer preferences. Setting it to 0 disables the animation completely."
|
||||
|
||||
- title: "Conversion pipeline: Add an option to set the minimum line height of all elemnts as a percentage of the computed font size. By default, calibre now sets the line height to 120% of the computed font size."
|
||||
|
||||
|
@ -41,6 +41,20 @@ series_index_auto_increment = 'next'
|
||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||
author_sort_copy_method = 'invert'
|
||||
|
||||
# Set which author field to display in the tags pane (the list of authors,
|
||||
# series, publishers etc on the left hand side). The choices are author and
|
||||
# author_sort. This tweak affects only the tags pane, and only what is displayed
|
||||
# under the authors category. Please note that if you set this to author_sort,
|
||||
# it is very possible to see duplicate names in the list becasue although it is
|
||||
# guaranteed that author names are unique, there is no such guarantee for
|
||||
# author_sort values. Showing duplicates won't break anything, but it could
|
||||
# lead to some confusion. When using 'author_sort', the tooltip will show the
|
||||
# author's name.
|
||||
# Examples:
|
||||
# tags_pane_use_field_for_author_name = 'author'
|
||||
# tags_pane_use_field_for_author_name = 'author_sort'
|
||||
tags_pane_use_field_for_author_name = 'author'
|
||||
|
||||
|
||||
# Set whether boolean custom columns are two- or three-valued.
|
||||
# Two-values for true booleans
|
||||
|
@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
#remove_tags_before = dict(name='div', attrs={'align':'center'})
|
||||
#remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':'subContent'}),
|
||||
dict(name='div', attrs={'id':['contentFrame']}),
|
||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
|
||||
#dict(name='table', attrs={'align':'RIGHT'}),
|
||||
]
|
||||
|
||||
|
||||
|
||||
# TO LOGIN
|
||||
@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
|
||||
|
||||
#TO GET ARTICLE TOC
|
||||
def johm_get_index(self):
|
||||
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
||||
return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
|
||||
|
||||
# To parse artice toc
|
||||
def parse_index(self):
|
||||
parse_soup = self.johm_get_index()
|
||||
parse_soup = self.johm_get_index()
|
||||
|
||||
div = parse_soup.find(id='contentCell')
|
||||
div = parse_soup.find(id='contentCell')
|
||||
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
for x in div.findAll(True):
|
||||
if x.name == 'h4':
|
||||
# Section heading found
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if current_section is not None and x.name == 'strong':
|
||||
title = self.tag_to_string(x)
|
||||
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
||||
if p is None:
|
||||
continue
|
||||
url = p.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://www3.interscience.wiley.com'+url
|
||||
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
#if url.startswith('/'):
|
||||
#url = 'http://online.wsj.com'+url
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
for x in div.findAll(True):
|
||||
if x.name == 'h4':
|
||||
# Section heading found
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = self.tag_to_string(x)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if current_section is not None and x.name == 'strong':
|
||||
title = self.tag_to_string(x)
|
||||
p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
|
||||
if p is None:
|
||||
continue
|
||||
url = p.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://www3.interscience.wiley.com'+url
|
||||
url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
#if url.startswith('/'):
|
||||
#url = 'http://online.wsj.com'+url
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', src=True):
|
||||
|
@ -11,16 +11,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(name='div', attrs={'align':'center'})
|
||||
remove_tags_after = dict(name='ol', attrs={'compact':'COMPACT'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
#dict(name='div', attrs={'class':'related-articles'}),
|
||||
dict(name='div', attrs={'id':['sidebar']}),
|
||||
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
|
||||
dict(name='table', attrs={'align':'RIGHT'}),
|
||||
]
|
||||
|
||||
keep_only_tags = dict(id='content')
|
||||
|
||||
|
||||
#TO LOGIN
|
||||
@ -38,61 +29,50 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
#TO GET ARTICLE TOC
|
||||
def nejm_get_index(self):
|
||||
return self.index_to_soup('http://content.nejm.org/current.dtl')
|
||||
return self.index_to_soup('http://content.nejm.org/current.dtl')
|
||||
|
||||
# To parse artice toc
|
||||
def parse_index(self):
|
||||
parse_soup = self.nejm_get_index()
|
||||
parse_soup = self.nejm_get_index()
|
||||
|
||||
div = parse_soup.find(id='centerTOC')
|
||||
feeds = []
|
||||
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
for x in div.findAll(True):
|
||||
if x.name == 'img' and '/toc/' in x.get('src', '') and 'uarrow.gif' not in x.get('src', ''):
|
||||
# Section heading found
|
||||
if current_articles and current_section and 'Week in the' not in current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_section = x.get('alt')
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
if current_section is not None and x.name == 'strong':
|
||||
title = self.tag_to_string(x)
|
||||
a = x.parent.find('a', href=lambda x: x and '/full/' in x)
|
||||
if a is None:
|
||||
continue
|
||||
url = a.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://content.nejm.org'+url
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
if url.startswith('/'):
|
||||
url = 'http://online.wsj.com'+url
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup.findAll(text=lambda x: x and '[in this window]' in x):
|
||||
a = a.findParent('a')
|
||||
url = a.get('href', None)
|
||||
if not url:
|
||||
div = parse_soup.find(attrs={'class':'tocContent'})
|
||||
for group in div.findAll(attrs={'class':'articleGrouping'}):
|
||||
feed_title = group.find(attrs={'class':'articleType'})
|
||||
if feed_title is None:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://content.nejm.org'+url
|
||||
isoup = self.index_to_soup(url)
|
||||
img = isoup.find('img', src=lambda x: x and
|
||||
x.startswith('/content/'))
|
||||
if img is not None:
|
||||
img.extract()
|
||||
table = a.findParent('table')
|
||||
table.replaceWith(img)
|
||||
return soup
|
||||
feed_title = self.tag_to_string(feed_title)
|
||||
articles = []
|
||||
self.log('Found section:', feed_title)
|
||||
for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry'
|
||||
in x}):
|
||||
link = art.find(attrs={'class':lambda x:x and 'articleLink' in
|
||||
x})
|
||||
if link is None:
|
||||
continue
|
||||
a = link.find('a', href=True)
|
||||
if a is None:
|
||||
continue
|
||||
url = a.get('href')
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.nejm.org'+url
|
||||
title = self.tag_to_string(a)
|
||||
self.log.info('\tFound article:', title, 'at', url)
|
||||
article = {'title':title, 'url':url, 'date':''}
|
||||
au = art.find(attrs={'class':'articleAuthors'})
|
||||
if au is not None:
|
||||
article['author'] = self.tag_to_string(au)
|
||||
desc = art.find(attrs={'class':'hover_text'})
|
||||
if desc is not None:
|
||||
desc = self.tag_to_string(desc)
|
||||
if 'author' in article:
|
||||
desc = ' by ' + article['author'] + ' ' +desc
|
||||
article['description'] = desc
|
||||
articles.append(article)
|
||||
if articles:
|
||||
feeds.append((feed_title, articles))
|
||||
|
||||
return feeds
|
||||
|
||||
|
||||
|
@ -62,9 +62,9 @@ class SWEEX(USBMS):
|
||||
# Ordered list of supported formats
|
||||
FORMATS = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
||||
|
||||
VENDOR_ID = [0x0525]
|
||||
PRODUCT_ID = [0xa4a5]
|
||||
BCD = [0x0319]
|
||||
VENDOR_ID = [0x0525, 0x177f]
|
||||
PRODUCT_ID = [0xa4a5, 0x300]
|
||||
BCD = [0x0319, 0x110]
|
||||
|
||||
VENDOR_NAME = 'SWEEX'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
|
||||
|
@ -18,6 +18,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, \
|
||||
from calibre.ebooks.metadata import title_sort
|
||||
from calibre.gui2 import config, NONE
|
||||
from calibre.library.field_metadata import TagsIcons, category_icon_map
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.search_query_parser import saved_searches
|
||||
from calibre.gui2 import error_dialog
|
||||
@ -409,17 +410,31 @@ class TagTreeItem(object): # {{{
|
||||
return NONE
|
||||
|
||||
def tag_data(self, role):
|
||||
tag = self.tag
|
||||
if tag.category == 'authors' and \
|
||||
tweaks['tags_pane_use_field_for_author_name'] == 'author_sort':
|
||||
name = tag.sort
|
||||
tt_author = True
|
||||
else:
|
||||
name = tag.name
|
||||
tt_author = False
|
||||
if role == Qt.DisplayRole:
|
||||
if self.tag.count == 0:
|
||||
return QVariant('%s'%(self.tag.name))
|
||||
if tag.count == 0:
|
||||
return QVariant('%s'%(name))
|
||||
else:
|
||||
return QVariant('[%d] %s'%(self.tag.count, self.tag.name))
|
||||
return QVariant('[%d] %s'%(tag.count, name))
|
||||
if role == Qt.EditRole:
|
||||
return QVariant(self.tag.name)
|
||||
return QVariant(tag.name)
|
||||
if role == Qt.DecorationRole:
|
||||
return self.icon_state_map[self.tag.state]
|
||||
if role == Qt.ToolTipRole and self.tag.tooltip is not None:
|
||||
return QVariant(self.tag.tooltip)
|
||||
return self.icon_state_map[tag.state]
|
||||
if role == Qt.ToolTipRole:
|
||||
if tt_author:
|
||||
if tag.tooltip is not None:
|
||||
return QVariant('(%s) %s'%(tag.name, tag.tooltip))
|
||||
else:
|
||||
return QVariant(tag.name)
|
||||
if tag.tooltip is not None:
|
||||
return QVariant(tag.tooltip)
|
||||
return NONE
|
||||
|
||||
def toggle(self):
|
||||
|
@ -1128,6 +1128,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
for l in list:
|
||||
(id, val, sort_val) = (l[0], l[1], l[2])
|
||||
tids[category][val] = (id, sort_val)
|
||||
elif cat['datatype'] == 'rating':
|
||||
for l in list:
|
||||
(id, val) = (l[0], l[1])
|
||||
tids[category][val] = (id, '{0:05.2f}'.format(val))
|
||||
else:
|
||||
for l in list:
|
||||
(id, val) = (l[0], l[1])
|
||||
@ -1256,12 +1260,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
# sort the list
|
||||
if sort == 'name':
|
||||
def get_sort_key(x):
|
||||
sk = x.s
|
||||
if isinstance(sk, unicode):
|
||||
sk = sort_key(sk)
|
||||
return sk
|
||||
kf = get_sort_key
|
||||
kf = lambda x :sort_key(x.s)
|
||||
reverse=False
|
||||
elif sort == 'popularity':
|
||||
kf = lambda x: x.c
|
||||
@ -1967,7 +1966,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
@classmethod
|
||||
def cleanup_tags(cls, tags):
|
||||
tags = [x.strip() for x in tags if x.strip()]
|
||||
tags = [x.strip().replace(',', ';') for x in tags if x.strip()]
|
||||
tags = [x.decode(preferred_encoding, 'replace') \
|
||||
if isbytestring(x) else x for x in tags]
|
||||
tags = [u' '.join(x.split()) for x in tags]
|
||||
|
@ -427,7 +427,9 @@ class SchemaUpgrade(object):
|
||||
|
||||
def upgrade_version_15(self):
|
||||
'Remove commas from tags'
|
||||
self.conn.execute("UPDATE tags SET name=REPLACE(name, ',', ';')")
|
||||
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';')")
|
||||
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';;')")
|
||||
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', '')")
|
||||
|
||||
def upgrade_version_16(self):
|
||||
self.conn.executescript('''
|
||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
|
||||
from calibre.library.comments import comments_to_html
|
||||
from calibre.library.server import custom_fields_to_display
|
||||
from calibre.library.server.utils import format_tag_string, Offsets
|
||||
from calibre import guess_type
|
||||
from calibre import guess_type, prepare_string_for_xml as xml
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.ordered_dict import OrderedDict
|
||||
|
||||
@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
|
||||
extra.append(_('RATING: %s<br />')%rating)
|
||||
tags = item[FM['tags']]
|
||||
if tags:
|
||||
extra.append(_('TAGS: %s<br />')%format_tag_string(tags, ',',
|
||||
extra.append(_('TAGS: %s<br />')%xml(format_tag_string(tags, ',',
|
||||
ignore_max=True,
|
||||
no_tag_count=True))
|
||||
no_tag_count=True)))
|
||||
series = item[FM['series']]
|
||||
if series:
|
||||
extra.append(_('SERIES: %s [%s]<br />')%\
|
||||
(series,
|
||||
(xml(series),
|
||||
fmt_sidx(float(item[FM['series_index']]))))
|
||||
for key in CKEYS:
|
||||
mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
|
||||
@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
|
||||
if val:
|
||||
datatype = CFM[key]['datatype']
|
||||
if datatype == 'text' and CFM[key]['is_multiple']:
|
||||
extra.append('%s: %s<br />'%(name, format_tag_string(val, ',',
|
||||
extra.append('%s: %s<br />'%(xml(name), xml(format_tag_string(val, ',',
|
||||
ignore_max=True,
|
||||
no_tag_count=True)))
|
||||
no_tag_count=True))))
|
||||
else:
|
||||
extra.append('%s: %s<br />'%(name, val))
|
||||
extra.append('%s: %s<br />'%(xml(name), xml(unicode(val))))
|
||||
comments = item[FM['comments']]
|
||||
if comments:
|
||||
comments = comments_to_html(comments)
|
||||
|
@ -541,7 +541,9 @@ Use the options to remove headers and footers to mitigate this issue. If the hea
|
||||
removed from the text it can throw off the paragraph unwrapping.
|
||||
|
||||
Some limitations of PDF input is complex, multi-column, and image based documents are not supported.
|
||||
Extraction of vector images and tables from within the document is also not supported.
|
||||
Extraction of vector images and tables from within the document is also not supported. Some PDFs use special glyphs to
|
||||
represent double ll or doubfle ff or fi,etc. Conversion of these may or may not work depending on jusy how they are
|
||||
represented internally in the PDF.
|
||||
|
||||
Comic Book Collections
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -548,6 +548,7 @@ class BasicNewsRecipe(Recipe):
|
||||
}
|
||||
|
||||
For an example, see the recipe for downloading `The Atlantic`.
|
||||
In addition, you can add 'author' for the author of the article.
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user