mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
078925ed7a
BIN
resources/images/news/ajc.png
Normal file
BIN
resources/images/news/ajc.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.0 KiB |
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.adventuregamers.com
|
www.adventuregamers.com
|
||||||
'''
|
'''
|
||||||
@ -11,13 +9,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdventureGamers(BasicNewsRecipe):
|
class AdventureGamers(BasicNewsRecipe):
|
||||||
title = u'Adventure Gamers'
|
title = u'Adventure Gamers'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Adventure games portal'
|
description = 'Adventure games portal'
|
||||||
publisher = 'Adventure Gamers'
|
publisher = 'Adventure Gamers'
|
||||||
category = 'news, games, adventure, technology'
|
category = 'news, games, adventure, technology'
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
oldest_article = 10
|
oldest_article = 10
|
||||||
delay = 10
|
delay = 10
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -26,14 +21,25 @@ class AdventureGamers(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = u'http://www.adventuregamers.com'
|
INDEX = u'http://www.adventuregamers.com'
|
||||||
|
extra_css = """
|
||||||
|
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||||
|
.pageheader_title{font-size: xx-large; color: #394128}
|
||||||
|
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
||||||
|
.score_bg {display: inline; width: 100%; margin-bottom: 2em}
|
||||||
|
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
|
||||||
|
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
|
||||||
|
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
|
||||||
|
.score_header{font-size: large; color: #50544A}
|
||||||
|
.bodytext{display: block}
|
||||||
|
body{font-family: Helvetica,Arial,sans-serif}
|
||||||
|
"""
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment', description
|
'comment' : description
|
||||||
, '--category', category
|
, 'tags' : category
|
||||||
, '--publisher', publisher
|
, 'publisher' : publisher
|
||||||
]
|
, 'language' : language
|
||||||
|
}
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'content_middle'})
|
dict(name='div', attrs={'class':'content_middle'})
|
||||||
@ -45,6 +51,7 @@ class AdventureGamers(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
||||||
|
|
||||||
@ -66,12 +73,12 @@ class AdventureGamers(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
|
for item in soup.findAll('div', attrs={'class':'floatright'}):
|
||||||
|
item.extract()
|
||||||
self.append_page(soup, soup.body, 3)
|
self.append_page(soup, soup.body, 3)
|
||||||
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
||||||
if pager:
|
if pager:
|
||||||
pager.extract()
|
pager.extract()
|
||||||
return soup
|
return self.adeify_images(soup)
|
||||||
|
@ -10,12 +10,31 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
|
||||||
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':['cxArticleContent']})
|
dict(name='div', attrs={'class':['cxArticleHeader']})
|
||||||
,dict(attrs={'id':['cxArticleText','cxArticleBodyText']})
|
,dict(attrs={'id':['cxArticleText']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div' , attrs={'class':'cxArticleList' })
|
||||||
|
,dict(name='div' , attrs={'class':'cxFeedTease' })
|
||||||
|
,dict(name='div' , attrs={'class':'cxElementEnlarge' })
|
||||||
|
,dict(name='div' , attrs={'id':'cxArticleTools' })
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'),
|
('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'),
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
@ -23,7 +42,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
# read by simply removing the pound sign from it. I currently have it
|
# read by simply removing the pound sign from it. I currently have it
|
||||||
# set to only get the Cobb area
|
# set to only get the Cobb area
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'),
|
#('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'),
|
||||||
#('Clayton', 'http://www.ajc.com/section-rss.do?source=clayton'),
|
#('Clayton', 'http://www.ajc.com/section-rss.do?source=clayton'),
|
||||||
#('DeKalb', 'http://www.ajc.com/section-rss.do?source=dekalb'),
|
#('DeKalb', 'http://www.ajc.com/section-rss.do?source=dekalb'),
|
||||||
#('Gwinnett', 'http://www.ajc.com/section-rss.do?source=gwinnett'),
|
#('Gwinnett', 'http://www.ajc.com/section-rss.do?source=gwinnett'),
|
||||||
@ -41,7 +60,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
# but again
|
# but again
|
||||||
# You can enable which ever team you like by removing the pound sign
|
# You can enable which ever team you like by removing the pound sign
|
||||||
# ------------------------------------------------------------------------
|
# ------------------------------------------------------------------------
|
||||||
('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'),
|
#('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'),
|
||||||
#('Braves', 'http://www.ajc.com/genericList-rss.do?source=61457'),
|
#('Braves', 'http://www.ajc.com/genericList-rss.do?source=61457'),
|
||||||
('Falcons', 'http://www.ajc.com/genericList-rss.do?source=61458'),
|
('Falcons', 'http://www.ajc.com/genericList-rss.do?source=61458'),
|
||||||
#('Hawks', 'http://www.ajc.com/genericList-rss.do?source=61522'),
|
#('Hawks', 'http://www.ajc.com/genericList-rss.do?source=61522'),
|
||||||
@ -52,11 +71,16 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'),
|
('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}):
|
||||||
|
credit_tag.name ='p'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
#def print_version(self, url):
|
||||||
|
# return url.partition('?')[0] +'?printArticle=y'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.partition('?')[0] +'?printArticle=y'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
125
resources/recipes/brand_eins.recipe
Normal file
125
resources/recipes/brand_eins.recipe
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>'
|
||||||
|
__version__ = '0.95'
|
||||||
|
|
||||||
|
''' http://brandeins.de - Wirtschaftsmagazin '''
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BrandEins(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Brand Eins'
|
||||||
|
__author__ = 'Constantin Hofstetter'
|
||||||
|
description = u'Wirtschaftsmagazin'
|
||||||
|
publisher ='brandeins.de'
|
||||||
|
category = 'politics, business, wirtschaft, Germany'
|
||||||
|
use_embedded_content = False
|
||||||
|
lang = 'de-DE'
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'de'
|
||||||
|
|
||||||
|
# 2 is the last full magazine (default)
|
||||||
|
# 1 is the newest (but not full)
|
||||||
|
# 3 is one before 2 etc.
|
||||||
|
which_ausgabe = 2
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'theContent'}), dict(name='div', attrs={'id':'sidebar'}), dict(name='div', attrs={'class':'intro'}), dict(name='p', attrs={'class':'bodytext'}), dict(name='div', attrs={'class':'single_image'})]
|
||||||
|
|
||||||
|
'''
|
||||||
|
brandeins.de
|
||||||
|
'''
|
||||||
|
|
||||||
|
def postprocess_html(self, soup,first):
|
||||||
|
|
||||||
|
# Move the image of the sidebar right below the h3
|
||||||
|
first_h3 = soup.find(name='div', attrs={'id':'theContent'}).find('h3')
|
||||||
|
for imgdiv in soup.findAll(name='div', attrs={'class':'single_image'}):
|
||||||
|
if len(first_h3.findNextSiblings('div', {'class':'intro'})) >= 1:
|
||||||
|
# first_h3.parent.insert(2, imgdiv)
|
||||||
|
first_h3.findNextSiblings('div', {'class':'intro'})[0].parent.insert(4, imgdiv)
|
||||||
|
else:
|
||||||
|
first_h3.parent.insert(2, imgdiv)
|
||||||
|
|
||||||
|
# Now, remove the sidebar
|
||||||
|
soup.find(name='div', attrs={'id':'sidebar'}).extract()
|
||||||
|
|
||||||
|
# Remove the rating-image (stars) from the h3
|
||||||
|
for img in first_h3.findAll(name='img'):
|
||||||
|
img.extract()
|
||||||
|
|
||||||
|
# Mark the intro texts as italic
|
||||||
|
for div in soup.findAll(name='div', attrs={'class':'intro'}):
|
||||||
|
for p in div.findAll('p'):
|
||||||
|
content = self.tag_to_string(p)
|
||||||
|
new_p = "<p><i>"+ content +"</i></p>"
|
||||||
|
p.replaceWith(new_p)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
archive = "http://www.brandeins.de/archiv.html"
|
||||||
|
|
||||||
|
soup = self.index_to_soup(archive)
|
||||||
|
latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
|
||||||
|
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-self.which_ausgabe]
|
||||||
|
url = pre_latest_issue.get('href', False)
|
||||||
|
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
|
||||||
|
self.title = "Brand Eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d+)", pre_latest_issue.find('img').get('title', False)).group('date')
|
||||||
|
url = 'http://brandeins.de/'+url
|
||||||
|
|
||||||
|
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
|
||||||
|
titles_and_articles = self.brand_eins_parse_latest_issue(url)
|
||||||
|
if titles_and_articles:
|
||||||
|
for title, articles in titles_and_articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def brand_eins_parse_latest_issue(self, url):
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
article_lists = [soup.find('div', attrs={'class':'subColumnLeft articleList'}), soup.find('div', attrs={'class':'subColumnRight articleList'})]
|
||||||
|
|
||||||
|
titles_and_articles = []
|
||||||
|
current_articles = []
|
||||||
|
chapter_title = "Editorial"
|
||||||
|
self.log('Found Chapter:', chapter_title)
|
||||||
|
|
||||||
|
# Remove last list of links (thats just the impressum and the 'gewinnspiel')
|
||||||
|
article_lists[1].findAll('ul')[len(article_lists[1].findAll('ul'))-1].extract()
|
||||||
|
|
||||||
|
for article_list in article_lists:
|
||||||
|
for chapter in article_list.findAll('ul'):
|
||||||
|
if len(chapter.findPreviousSiblings('h3')) >= 1:
|
||||||
|
new_chapter_title = string.capwords(self.tag_to_string(chapter.findPreviousSiblings('h3')[0]))
|
||||||
|
if new_chapter_title != chapter_title:
|
||||||
|
titles_and_articles.append([chapter_title, current_articles])
|
||||||
|
current_articles = []
|
||||||
|
self.log('Found Chapter:', new_chapter_title)
|
||||||
|
chapter_title = new_chapter_title
|
||||||
|
for li in chapter.findAll('li'):
|
||||||
|
a = li.find('a', href = True)
|
||||||
|
if a is None:
|
||||||
|
continue
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
if not url or not title:
|
||||||
|
continue
|
||||||
|
url = 'http://brandeins.de/'+url
|
||||||
|
if len(a.parent.findNextSiblings('p')) >= 1:
|
||||||
|
description = self.tag_to_string(a.parent.findNextSiblings('p')[0])
|
||||||
|
else:
|
||||||
|
description = ''
|
||||||
|
|
||||||
|
self.log('\t\tFound article:', title)
|
||||||
|
self.log('\t\t\t', url)
|
||||||
|
self.log('\t\t\t', description)
|
||||||
|
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
|
||||||
|
titles_and_articles.append([chapter_title, current_articles])
|
||||||
|
return titles_and_articles
|
@ -1,7 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
'''
|
'''
|
||||||
calibre recipe for slate.com
|
calibre recipe for slate.com
|
||||||
'''
|
'''
|
||||||
@ -10,13 +11,12 @@ import re
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag
|
||||||
|
|
||||||
class PeriodicalNameHere(BasicNewsRecipe):
|
class Slate(BasicNewsRecipe):
|
||||||
# Method variables for customizing downloads
|
# Method variables for customizing downloads
|
||||||
title = 'Slate'
|
|
||||||
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
|
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
|
||||||
__author__ = 'GRiker and Sujata Raman'
|
__author__ = 'GRiker, Sujata Raman and Nick Redding'
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 100
|
||||||
oldest_article = 7.0
|
oldest_article = 14
|
||||||
recursions = 0
|
recursions = 0
|
||||||
delay = 0
|
delay = 0
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
@ -27,6 +27,12 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
encoding = None
|
encoding = None
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
slate_complete = True
|
||||||
|
if slate_complete:
|
||||||
|
title = 'Slate (complete)'
|
||||||
|
else:
|
||||||
|
title = 'Slate (weekly)'
|
||||||
|
|
||||||
# Method variables for customizing feed parsing
|
# Method variables for customizing feed parsing
|
||||||
summary_length = 250
|
summary_length = 250
|
||||||
use_embedded_content = None
|
use_embedded_content = None
|
||||||
@ -42,26 +48,15 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
match_regexps = []
|
match_regexps = []
|
||||||
|
|
||||||
# The second entry is for 'Big Money', which comes from a different site, uses different markup
|
# The second entry is for 'Big Money', which comes from a different site, uses different markup
|
||||||
keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body', 'story']}),
|
keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body']}),
|
||||||
dict(attrs={ 'id':['content']}) ]
|
dict(attrs={ 'id':['content']}) ]
|
||||||
|
|
||||||
# The second entry is for 'Big Money', which comes from a different site, uses different markup
|
# The second entry is for 'Big Money', which comes from a different site, uses different markup
|
||||||
remove_tags = [dict(attrs={ 'id':[
|
remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper',
|
||||||
'add_comments_button',
|
'article_bottom_tools_cntr','fray_article_discussion','fray_article_links','bottom_sponsored_links','author_bio',
|
||||||
'article_bottom_tools',
|
'bizbox_links_bottom','ris_links_wrapper','BOXXLE',
|
||||||
'article_bottom_tools_cntr',
|
'comments_button','add_comments_button','comments-to-fray','marriott_ad',
|
||||||
'bizbox_links_bottom',
|
'article_bottom_tools','recommend_tab2','fbog_article_bottom_cntr']}),
|
||||||
'BOXXLE',
|
|
||||||
'comments_button',
|
|
||||||
'comments-to-fray',
|
|
||||||
'fbog_article_bottom_cntr',
|
|
||||||
'fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio',
|
|
||||||
'insider_ad_wrapper',
|
|
||||||
'js_kit_cntr',
|
|
||||||
'recommend_tab',
|
|
||||||
'ris_links_wrapper',
|
|
||||||
'toolbox',
|
|
||||||
]}),
|
|
||||||
dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ]
|
dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ]
|
||||||
|
|
||||||
excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
|
excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
|
||||||
@ -72,16 +67,15 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
extra_css = '''
|
extra_css = '''
|
||||||
.h1_subhead{font-family:Arial; font-size:small; }
|
.h1_subhead{font-family:Arial; font-size:small; }
|
||||||
h1{font-family:Verdana; font-size:large; }
|
h1{font-family:Verdana; font-size:large; }
|
||||||
.byline {font-family:Georgia; margin-bottom: 0px; color: #660033;}
|
.byline {font-family:Georgia; margin-bottom: 0px; }
|
||||||
.dateline {font-family:Arial; font-size: smaller; height: 0pt; color:#666666;}
|
.dateline {font-family:Arial; font-size: smaller; height: 0pt;}
|
||||||
.imagewrapper {font-family:Verdana;font-size:x-small; }
|
.imagewrapper {font-family:Verdana;font-size:x-small; }
|
||||||
.source {font-family:Verdana; font-size:x-small;}
|
.source {font-family:Verdana; font-size:x-small;}
|
||||||
.credit {font-family:Verdana; font-size: smaller;}
|
.credit {font-family:Verdana; font-size: smaller;}
|
||||||
#article_body {font-family:Verdana; }
|
#article_body {font-family:Verdana; }
|
||||||
#content {font-family:Arial; }
|
#content {font-family:Arial; }
|
||||||
.caption{font-family:Verdana;font-style:italic; font-size:x-small;}
|
.caption{font-family:Verdana;font-style:italic; font-size:x-small;}
|
||||||
h3{font-family:Arial; color:#666666; font-size:small}
|
h3{font-family:Arial; font-size:small}
|
||||||
a{color:#0066CC;}
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Local variables to extend class
|
# Local variables to extend class
|
||||||
@ -99,32 +93,59 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
if isinstance(item, (NavigableString, CData)):
|
if isinstance(item, (NavigableString, CData)):
|
||||||
strings.append(item.string)
|
strings.append(item.string)
|
||||||
elif isinstance(item, Tag):
|
elif isinstance(item, Tag):
|
||||||
res = self.tag_to_string(item)
|
res = self.tag_to_string(item,use_alt=False)
|
||||||
if res:
|
if res:
|
||||||
strings.append(res)
|
strings.append(res)
|
||||||
return strings
|
return strings
|
||||||
|
|
||||||
|
def extract_named_sections(self):
|
||||||
def extract_sections(self):
|
|
||||||
soup = self.index_to_soup( self.baseURL )
|
soup = self.index_to_soup( self.baseURL )
|
||||||
soup_top_stories = soup.find(True, attrs={'class':'tap2_topic entry-content'})
|
soup_nav_bar = soup.find(True, attrs={'id':'nav'})
|
||||||
|
briefing_nav = soup.find('li')
|
||||||
|
briefing_url = briefing_nav.a['href']
|
||||||
|
for section_nav in soup_nav_bar.findAll('li'):
|
||||||
|
section_name = self.tag_to_string(section_nav,use_alt=False)
|
||||||
|
self.section_dates.append(section_name)
|
||||||
|
|
||||||
|
soup = self.index_to_soup(briefing_url)
|
||||||
|
|
||||||
|
self.log("Briefing url = %s " % briefing_url)
|
||||||
|
section_lists = soup.findAll('ul','view_links_list')
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
for section in section_lists :
|
||||||
|
sections.append(section)
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def extract_dated_sections(self):
|
||||||
|
soup = self.index_to_soup( self.baseURL )
|
||||||
|
soup_top_stories = soup.find(True, attrs={'id':'tap3_cntr'})
|
||||||
|
if soup_top_stories:
|
||||||
|
self.section_dates.append("Top Stories")
|
||||||
|
self.log("SELECTION TOP STORIES %s" % "Top Stories")
|
||||||
|
|
||||||
soup = soup.find(True, attrs={'id':'toc_links_container'})
|
soup = soup.find(True, attrs={'id':'toc_links_container'})
|
||||||
|
|
||||||
todays_section = soup.find(True, attrs={'class':'todaydateline'})
|
todays_section = soup.find(True, attrs={'class':'todaydateline'})
|
||||||
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
|
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
|
||||||
|
self.log("SELECTION DATE %s" % self.tag_to_string(todays_section,use_alt=False))
|
||||||
|
|
||||||
older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
|
older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
|
||||||
for older_section in older_section_dates :
|
for older_section in older_section_dates :
|
||||||
self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
|
self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
|
||||||
|
self.log("SELECTION DATE %s" % self.tag_to_string(older_section,use_alt=False))
|
||||||
|
|
||||||
if soup_top_stories:
|
if soup_top_stories:
|
||||||
headline_stories = soup_top_stories.find('ul')
|
headline_stories = soup_top_stories
|
||||||
|
self.log("HAVE top_stories")
|
||||||
else:
|
else:
|
||||||
headline_stories = None
|
headline_stories = None
|
||||||
|
self.log("NO top_stories")
|
||||||
section_lists = soup.findAll('ul')
|
section_lists = soup.findAll('ul')
|
||||||
# Prepend the headlines to the first section
|
# Prepend the headlines to the first section
|
||||||
if headline_stories:
|
if headline_stories:
|
||||||
section_lists[0].insert(0,headline_stories)
|
section_lists.insert(0,headline_stories)
|
||||||
|
|
||||||
sections = []
|
sections = []
|
||||||
for section in section_lists :
|
for section in section_lists :
|
||||||
@ -134,8 +155,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
|
|
||||||
def extract_section_articles(self, sections_html) :
|
def extract_section_articles(self, sections_html) :
|
||||||
# Find the containers with section content
|
# Find the containers with section content
|
||||||
soup = self.index_to_soup(str(sections_html))
|
sections = sections_html
|
||||||
sections = soup.findAll('ul')
|
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
key = None
|
key = None
|
||||||
@ -145,10 +165,25 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
|
|
||||||
# Get the section name
|
# Get the section name
|
||||||
if section.has_key('id') :
|
if section.has_key('id') :
|
||||||
|
self.log("PROCESSING SECTION id = %s" % section['id'])
|
||||||
key = self.section_dates[i]
|
key = self.section_dates[i]
|
||||||
|
if key.startswith("Pod"):
|
||||||
|
continue
|
||||||
|
if key.startswith("Blog"):
|
||||||
|
continue
|
||||||
|
articles[key] = []
|
||||||
|
ans.append(key)
|
||||||
|
elif self.slate_complete:
|
||||||
|
key = self.section_dates[i]
|
||||||
|
if key.startswith("Pod"):
|
||||||
|
continue
|
||||||
|
if key.startswith("Blog"):
|
||||||
|
continue
|
||||||
|
self.log("PROCESSING SECTION name = %s" % key)
|
||||||
articles[key] = []
|
articles[key] = []
|
||||||
ans.append(key)
|
ans.append(key)
|
||||||
else :
|
else :
|
||||||
|
self.log("SECTION %d HAS NO id" % i);
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get the section article_list
|
# Get the section article_list
|
||||||
@ -159,8 +194,10 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
bylines = self.tag_to_strings(article)
|
bylines = self.tag_to_strings(article)
|
||||||
url = article.a['href']
|
url = article.a['href']
|
||||||
title = bylines[0]
|
title = bylines[0]
|
||||||
full_title = self.tag_to_string(article)
|
full_title = self.tag_to_string(article,use_alt=False)
|
||||||
|
#self.log("ARTICLE TITLE%s" % title)
|
||||||
|
#self.log("ARTICLE FULL_TITLE%s" % full_title)
|
||||||
|
#self.log("URL %s" % url)
|
||||||
author = None
|
author = None
|
||||||
description = None
|
description = None
|
||||||
pubdate = None
|
pubdate = None
|
||||||
@ -191,7 +228,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
excluded = re.compile('|'.join(self.excludedDescriptionKeywords))
|
excluded = re.compile('|'.join(self.excludedDescriptionKeywords))
|
||||||
found_excluded = excluded.search(description)
|
found_excluded = excluded.search(description)
|
||||||
if found_excluded :
|
if found_excluded :
|
||||||
if self.verbose : self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip articles whose title contain excluded keywords
|
# Skip articles whose title contain excluded keywords
|
||||||
@ -200,7 +237,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
#self.log("evaluating full_title: %s" % full_title)
|
#self.log("evaluating full_title: %s" % full_title)
|
||||||
found_excluded = excluded.search(full_title)
|
found_excluded = excluded.search(full_title)
|
||||||
if found_excluded :
|
if found_excluded :
|
||||||
if self.verbose : self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip articles whose author contain excluded keywords
|
# Skip articles whose author contain excluded keywords
|
||||||
@ -208,7 +245,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
excluded = re.compile('|'.join(self.excludedAuthorKeywords))
|
excluded = re.compile('|'.join(self.excludedAuthorKeywords))
|
||||||
found_excluded = excluded.search(author)
|
found_excluded = excluded.search(author)
|
||||||
if found_excluded :
|
if found_excluded :
|
||||||
if self.verbose : self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
skip_this_article = False
|
skip_this_article = False
|
||||||
@ -216,6 +253,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
for article in articles[key] :
|
for article in articles[key] :
|
||||||
if article['url'] == url :
|
if article['url'] == url :
|
||||||
skip_this_article = True
|
skip_this_article = True
|
||||||
|
self.log("SKIPPING DUP %s" % url)
|
||||||
break
|
break
|
||||||
|
|
||||||
if skip_this_article :
|
if skip_this_article :
|
||||||
@ -227,6 +265,8 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
articles[feed] = []
|
articles[feed] = []
|
||||||
articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
|
articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
|
||||||
author=author, content=''))
|
author=author, content=''))
|
||||||
|
#self.log("KEY %s" % feed)
|
||||||
|
#self.log("APPENDED %s" % url)
|
||||||
# Promote 'newspapers' to top
|
# Promote 'newspapers' to top
|
||||||
for (i,article) in enumerate(articles[feed]) :
|
for (i,article) in enumerate(articles[feed]) :
|
||||||
if article['description'] is not None :
|
if article['description'] is not None :
|
||||||
@ -235,32 +275,6 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
ans = self.remove_duplicates(ans)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def flatten_document(self, ans):
|
|
||||||
flat_articles = []
|
|
||||||
for (i,section) in enumerate(ans) :
|
|
||||||
#self.log("flattening section %s: " % section[0])
|
|
||||||
for article in section[1] :
|
|
||||||
#self.log("moving %s to flat_articles[]" % article['title'])
|
|
||||||
flat_articles.append(article)
|
|
||||||
flat_section = ['All Articles', flat_articles]
|
|
||||||
flat_ans = [flat_section]
|
|
||||||
return flat_ans
|
|
||||||
|
|
||||||
def remove_duplicates(self, ans):
|
|
||||||
# Return a stripped ans
|
|
||||||
for (i,section) in enumerate(ans) :
|
|
||||||
#self.log("section %s: " % section[0])
|
|
||||||
for article in section[1] :
|
|
||||||
#self.log("\t%s" % article['title'])
|
|
||||||
#self.log("\looking for %s" % article['url'])
|
|
||||||
for (j,subsequent_section) in enumerate(ans[i+1:]) :
|
|
||||||
for (k,subsequent_article) in enumerate(subsequent_section[1]) :
|
|
||||||
if article['url'] == subsequent_article['url'] :
|
|
||||||
#self.log( "removing %s (%s) from %s" % (subsequent_article['title'], subsequent_article['url'], subsequent_section[0]) )
|
|
||||||
del subsequent_section[1][k]
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def print_version(self, url) :
|
def print_version(self, url) :
|
||||||
@ -268,13 +282,22 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
|
|
||||||
# Class methods
|
# Class methods
|
||||||
def parse_index(self) :
|
def parse_index(self) :
|
||||||
sections = self.extract_sections()
|
if self.slate_complete:
|
||||||
|
sections = self.extract_named_sections()
|
||||||
|
else:
|
||||||
|
sections = self.extract_dated_sections()
|
||||||
section_list = self.extract_section_articles(sections)
|
section_list = self.extract_section_articles(sections)
|
||||||
section_list = self.flatten_document(section_list)
|
|
||||||
return section_list
|
return section_list
|
||||||
|
|
||||||
def get_browser(self) :
|
def get_masthead_url(self):
|
||||||
return BasicNewsRecipe.get_browser()
|
masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nMasthead unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
||||||
|
|
||||||
def stripAnchors(self,soup):
|
def stripAnchors(self,soup):
|
||||||
body = soup.find('div',attrs={'id':['article_body','content']})
|
body = soup.find('div',attrs={'id':['article_body','content']})
|
||||||
@ -304,8 +327,8 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
excluded = re.compile('|'.join(self.excludedContentKeywords))
|
excluded = re.compile('|'.join(self.excludedContentKeywords))
|
||||||
found_excluded = excluded.search(str(soup))
|
found_excluded = excluded.search(str(soup))
|
||||||
if found_excluded :
|
if found_excluded :
|
||||||
print "no allowed content found, removing article"
|
print "No allowed content found, removing article"
|
||||||
raise Exception('String error')
|
raise Exception('Rejected article')
|
||||||
|
|
||||||
# Articles from www.thebigmoney.com use different tagging for byline, dateline and body
|
# Articles from www.thebigmoney.com use different tagging for byline, dateline and body
|
||||||
head = soup.find('head')
|
head = soup.find('head')
|
||||||
@ -338,7 +361,6 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
dept_kicker = soup.find('div', attrs={'class':'department_kicker'})
|
dept_kicker = soup.find('div', attrs={'class':'department_kicker'})
|
||||||
if dept_kicker is not None :
|
if dept_kicker is not None :
|
||||||
kicker_strings = self.tag_to_strings(dept_kicker)
|
kicker_strings = self.tag_to_strings(dept_kicker)
|
||||||
#kicker = kicker_strings[2] + kicker_strings[3]
|
|
||||||
kicker = ''.join(kicker_strings[2:])
|
kicker = ''.join(kicker_strings[2:])
|
||||||
kicker = re.sub('\.','',kicker)
|
kicker = re.sub('\.','',kicker)
|
||||||
h3Tag = Tag(soup, "h3")
|
h3Tag = Tag(soup, "h3")
|
||||||
@ -346,23 +368,9 @@ class PeriodicalNameHere(BasicNewsRecipe):
|
|||||||
emTag.insert(0,NavigableString(kicker))
|
emTag.insert(0,NavigableString(kicker))
|
||||||
h3Tag.insert(0, emTag)
|
h3Tag.insert(0, emTag)
|
||||||
dept_kicker.replaceWith(h3Tag)
|
dept_kicker.replaceWith(h3Tag)
|
||||||
|
else:
|
||||||
# Change <h1> to <h2>
|
self.log("No kicker--return null")
|
||||||
headline = soup.find("h1")
|
return None
|
||||||
#tag = headline.find("span")
|
|
||||||
#tag.name = 'div'
|
|
||||||
|
|
||||||
if headline is not None :
|
|
||||||
h2tag = Tag(soup, "h2")
|
|
||||||
h2tag['class'] = "headline"
|
|
||||||
strs = self.tag_to_strings(headline)
|
|
||||||
result = ''
|
|
||||||
for (i,substr) in enumerate(strs) :
|
|
||||||
result += substr
|
|
||||||
if i < len(strs) -1 :
|
|
||||||
result += '<br />'
|
|
||||||
#h2tag.insert(0, result)
|
|
||||||
#headline.replaceWith(h2tag)
|
|
||||||
|
|
||||||
# Fix up the concatenated byline and dateline
|
# Fix up the concatenated byline and dateline
|
||||||
byline = soup.find(True,attrs={'class':'byline'})
|
byline = soup.find(True,attrs={'class':'byline'})
|
||||||
|
@ -5,15 +5,16 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, time
|
||||||
import sqlite3 as sqlite
|
import sqlite3 as sqlite
|
||||||
|
|
||||||
from calibre.devices.usbms.books import BookList
|
from calibre.devices.usbms.books import BookList
|
||||||
from calibre.devices.kobo.books import Book
|
from calibre.devices.kobo.books import Book
|
||||||
from calibre.devices.kobo.books import ImageWrapper
|
from calibre.devices.kobo.books import ImageWrapper
|
||||||
from calibre.devices.mime import mime_type_ext
|
from calibre.devices.mime import mime_type_ext
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS, debug_print
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
|
from calibre.devices.usbms.books import CollectionsBookList
|
||||||
|
|
||||||
class KOBO(USBMS):
|
class KOBO(USBMS):
|
||||||
|
|
||||||
@ -21,12 +22,15 @@ class KOBO(USBMS):
|
|||||||
gui_name = 'Kobo Reader'
|
gui_name = 'Kobo Reader'
|
||||||
description = _('Communicate with the Kobo Reader')
|
description = _('Communicate with the Kobo Reader')
|
||||||
author = 'Timothy Legge and Kovid Goyal'
|
author = 'Timothy Legge and Kovid Goyal'
|
||||||
version = (1, 0, 4)
|
version = (1, 0, 6)
|
||||||
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
booklist_class = CollectionsBookList
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub', 'pdf']
|
FORMATS = ['epub', 'pdf']
|
||||||
|
CAN_SET_METADATA = True
|
||||||
|
|
||||||
VENDOR_ID = [0x2237]
|
VENDOR_ID = [0x2237]
|
||||||
PRODUCT_ID = [0x4161]
|
PRODUCT_ID = [0x4161]
|
||||||
@ -40,6 +44,12 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo'])
|
VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo'])
|
||||||
|
|
||||||
|
EXTRA_CUSTOMIZATION_MESSAGE = _('The Kobo supports only one collection '
|
||||||
|
'currently: the \"Im_Reading\" list. Create a tag called \"Im_Reading\" ')+\
|
||||||
|
'for automatic management'
|
||||||
|
|
||||||
|
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['tags'])
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
USBMS.initialize(self)
|
USBMS.initialize(self)
|
||||||
self.book_class = Book
|
self.book_class = Book
|
||||||
@ -63,6 +73,8 @@ class KOBO(USBMS):
|
|||||||
self._card_b_prefix if oncard == 'cardb' \
|
self._card_b_prefix if oncard == 'cardb' \
|
||||||
else self._main_prefix
|
else self._main_prefix
|
||||||
|
|
||||||
|
self.booklist_class.rebuild_collections = self.rebuild_collections
|
||||||
|
|
||||||
# get the metadata cache
|
# get the metadata cache
|
||||||
bl = self.booklist_class(oncard, prefix, self.settings)
|
bl = self.booklist_class(oncard, prefix, self.settings)
|
||||||
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
|
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
|
||||||
@ -85,9 +97,7 @@ class KOBO(USBMS):
|
|||||||
playlist_map = {}
|
playlist_map = {}
|
||||||
|
|
||||||
if readstatus == 1:
|
if readstatus == 1:
|
||||||
if lpath not in playlist_map:
|
playlist_map[lpath]= "Im_Reading"
|
||||||
playlist_map[lpath] = []
|
|
||||||
playlist_map[lpath].append("I\'m Reading")
|
|
||||||
|
|
||||||
path = self.normalize_path(path)
|
path = self.normalize_path(path)
|
||||||
# print "Normalized FileName: " + path
|
# print "Normalized FileName: " + path
|
||||||
@ -104,14 +114,17 @@ class KOBO(USBMS):
|
|||||||
if self.update_metadata_item(bl[idx]):
|
if self.update_metadata_item(bl[idx]):
|
||||||
# print 'update_metadata_item returned true'
|
# print 'update_metadata_item returned true'
|
||||||
changed = True
|
changed = True
|
||||||
bl[idx].device_collections = playlist_map.get(lpath, [])
|
if lpath in playlist_map and \
|
||||||
|
playlist_map[lpath] not in bl[idx].device_collections:
|
||||||
|
bl[idx].device_collections.append(playlist_map[lpath])
|
||||||
else:
|
else:
|
||||||
if ContentType == '6':
|
if ContentType == '6':
|
||||||
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
|
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
|
||||||
else:
|
else:
|
||||||
book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
|
book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
|
||||||
# print 'Update booklist'
|
# print 'Update booklist'
|
||||||
book.device_collections = playlist_map.get(book.lpath, [])
|
book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else []
|
||||||
|
|
||||||
if bl.add_book(book, replace_metadata=False):
|
if bl.add_book(book, replace_metadata=False):
|
||||||
changed = True
|
changed = True
|
||||||
except: # Probably a path encoding error
|
except: # Probably a path encoding error
|
||||||
@ -398,3 +411,95 @@ class KOBO(USBMS):
|
|||||||
size = os.stat(cls.normalize_path(os.path.join(prefix, lpath))).st_size
|
size = os.stat(cls.normalize_path(os.path.join(prefix, lpath))).st_size
|
||||||
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=size, other=mi)
|
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=size, other=mi)
|
||||||
return book
|
return book
|
||||||
|
|
||||||
|
def get_device_paths(self):
|
||||||
|
paths, prefixes = {}, {}
|
||||||
|
for prefix, path, source_id in [
|
||||||
|
('main', 'metadata.calibre', 0),
|
||||||
|
('card_a', 'metadata.calibre', 1),
|
||||||
|
('card_b', 'metadata.calibre', 2)
|
||||||
|
]:
|
||||||
|
prefix = getattr(self, '_%s_prefix'%prefix)
|
||||||
|
if prefix is not None and os.path.exists(prefix):
|
||||||
|
paths[source_id] = os.path.join(prefix, *(path.split('/')))
|
||||||
|
return paths
|
||||||
|
|
||||||
|
def update_device_database_collections(self, booklists, collections_attributes):
|
||||||
|
# debug_print('Starting update_device_database_collections', collections_attributes)
|
||||||
|
|
||||||
|
# Force collections_attributes to be 'tags' as no other is currently supported
|
||||||
|
# debug_print('KOBO: overriding the provided collections_attributes:', collections_attributes)
|
||||||
|
collections_attributes = ['tags']
|
||||||
|
|
||||||
|
collections = booklists.get_collections(collections_attributes)
|
||||||
|
# debug_print('Collections', collections)
|
||||||
|
for category, books in collections.items():
|
||||||
|
if category == 'Im_Reading':
|
||||||
|
# Create a connection to the sqlite database
|
||||||
|
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Reset Im_Reading list in the database
|
||||||
|
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null'
|
||||||
|
try:
|
||||||
|
cursor.execute (query)
|
||||||
|
except:
|
||||||
|
debug_print('Database Exception: Unable to reset Im_Reading list')
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
# debug_print('Commit: Reset Im_Reading list')
|
||||||
|
connection.commit()
|
||||||
|
|
||||||
|
for book in books:
|
||||||
|
# debug_print('Title:', book.title, 'lpath:', book.path)
|
||||||
|
book.device_collections = ['Im_Reading']
|
||||||
|
|
||||||
|
extension = os.path.splitext(book.path)[1]
|
||||||
|
ContentType = self.get_content_type_from_extension(extension)
|
||||||
|
|
||||||
|
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||||
|
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||||
|
|
||||||
|
t = (datelastread,ContentID,)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
|
||||||
|
except:
|
||||||
|
debug_print('Database Exception: Unable create Im_Reading list')
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
connection.commit()
|
||||||
|
# debug_print('Database: Commit create Im_Reading list')
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
connection.close()
|
||||||
|
|
||||||
|
# debug_print('Finished update_device_database_collections', collections_attributes)
|
||||||
|
|
||||||
|
def sync_booklists(self, booklists, end_session=True):
|
||||||
|
# debug_print('KOBO: started sync_booklists')
|
||||||
|
paths = self.get_device_paths()
|
||||||
|
|
||||||
|
blists = {}
|
||||||
|
for i in paths:
|
||||||
|
if booklists[i] is not None:
|
||||||
|
#debug_print('Booklist: ', i)
|
||||||
|
blists[i] = booklists[i]
|
||||||
|
opts = self.settings()
|
||||||
|
if opts.extra_customization:
|
||||||
|
collections = [x.lower().strip() for x in
|
||||||
|
opts.extra_customization.split(',')]
|
||||||
|
else:
|
||||||
|
collections = []
|
||||||
|
|
||||||
|
#debug_print('KOBO: collection fields:', collections)
|
||||||
|
for i, blist in blists.items():
|
||||||
|
self.update_device_database_collections(blist, collections)
|
||||||
|
|
||||||
|
USBMS.sync_booklists(self, booklists, end_session=end_session)
|
||||||
|
#debug_print('KOBO: finished sync_booklists')
|
||||||
|
|
||||||
|
def rebuild_collections(self, booklist, oncard):
|
||||||
|
collections_attributes = []
|
||||||
|
self.update_device_database_collections(booklist, collections_attributes)
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
'chapter', 'chapter_mark',
|
'chapter', 'chapter_mark',
|
||||||
'prefer_metadata_cover', 'remove_first_image',
|
'prefer_metadata_cover', 'remove_first_image',
|
||||||
'insert_metadata', 'page_breaks_before',
|
'insert_metadata', 'page_breaks_before',
|
||||||
'preprocess_html',
|
'preprocess_html', 'html_unwrap_factor',
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
|
||||||
|
@ -362,6 +362,15 @@ OptionRecommendation(name='preprocess_html',
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='html_unwrap_factor',
|
||||||
|
recommended_value=0.40, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Scale used to determine the length at which a line should '
|
||||||
|
'be unwrapped if preprocess is enabled. Valid values are a decimal between 0 and 1. The '
|
||||||
|
'default is 0.40, just below the median line length. This will unwrap typical books '
|
||||||
|
' with hard line breaks, but should be reduced if the line length is variable.'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='smarten_punctuation',
|
OptionRecommendation(name='smarten_punctuation',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Convert plain quotes, dashes and ellipsis to their '
|
help=_('Convert plain quotes, dashes and ellipsis to their '
|
||||||
|
@ -351,7 +351,7 @@ class HTMLPreProcessor(object):
|
|||||||
# print "The pdf line length returned is " + str(length)
|
# print "The pdf line length returned is " + str(length)
|
||||||
end_rules.append(
|
end_rules.append(
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
(re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||||
)
|
)
|
||||||
|
|
||||||
for rule in self.PREPROCESS + start_rules:
|
for rule in self.PREPROCESS + start_rules:
|
||||||
|
@ -11,10 +11,11 @@ from calibre.utils.logging import default_log
|
|||||||
|
|
||||||
class PreProcessor(object):
|
class PreProcessor(object):
|
||||||
|
|
||||||
def __init__(self, log=None):
|
def __init__(self, log=None, extra_opts=None):
|
||||||
self.log = default_log if log is None else log
|
self.log = default_log if log is None else log
|
||||||
self.html_preprocess_sections = 0
|
self.html_preprocess_sections = 0
|
||||||
self.found_indents = 0
|
self.found_indents = 0
|
||||||
|
self.extra_opts = extra_opts
|
||||||
|
|
||||||
def chapter_head(self, match):
|
def chapter_head(self, match):
|
||||||
chap = match.group('chap')
|
chap = match.group('chap')
|
||||||
@ -91,6 +92,7 @@ class PreProcessor(object):
|
|||||||
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
|
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
|
||||||
linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
||||||
blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE)
|
blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE)
|
||||||
|
#multi_blank = re.compile(r'(\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>){2,}', re.IGNORECASE)
|
||||||
blanklines = blankreg.findall(html)
|
blanklines = blankreg.findall(html)
|
||||||
lines = linereg.findall(html)
|
lines = linereg.findall(html)
|
||||||
if len(lines) > 1:
|
if len(lines) > 1:
|
||||||
@ -147,15 +149,16 @@ class PreProcessor(object):
|
|||||||
format = 'html'
|
format = 'html'
|
||||||
|
|
||||||
# Calculate Length
|
# Calculate Length
|
||||||
length = line_length(format, html, 0.4)
|
length = line_length('pdf', html, getattr(self.extra_opts,
|
||||||
|
'html_unwrap_factor', 0.4))
|
||||||
self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***")
|
self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***")
|
||||||
#
|
#
|
||||||
# Unwrap and/or delete soft-hyphens, hyphens
|
# Unwrap and/or delete soft-hyphens, hyphens
|
||||||
html = re.sub(u'\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
|
html = re.sub(u'\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
|
||||||
html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
|
html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html)
|
||||||
|
|
||||||
# Unwrap lines using punctation if the median length of all lines is less than 200
|
# Unwrap lines using punctation and line length
|
||||||
unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
|
unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
|
||||||
html = unwrap.sub(' ', html)
|
html = unwrap.sub(' ', html)
|
||||||
|
|
||||||
# If still no sections after unwrapping mark split points on lines with no punctuation
|
# If still no sections after unwrapping mark split points on lines with no punctuation
|
||||||
|
@ -12,6 +12,7 @@ from copy import deepcopy
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.ebooks.conversion.utils import PreProcessor
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
|
|
||||||
class Canvas(etree.XSLTExtension):
|
class Canvas(etree.XSLTExtension):
|
||||||
@ -419,4 +420,9 @@ class LRFInput(InputFormatPlugin):
|
|||||||
styles.write()
|
styles.write()
|
||||||
return os.path.abspath('content.opf')
|
return os.path.abspath('content.opf')
|
||||||
|
|
||||||
|
def preprocess_html(self, html):
|
||||||
|
preprocessor = PreProcessor(log=getattr(self, 'log', None))
|
||||||
|
return preprocessor(html)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,8 +26,10 @@ class StructureDetectionWidget(Widget, Ui_Form):
|
|||||||
'remove_first_image',
|
'remove_first_image',
|
||||||
'insert_metadata', 'page_breaks_before',
|
'insert_metadata', 'page_breaks_before',
|
||||||
'preprocess_html', 'remove_header', 'header_regex',
|
'preprocess_html', 'remove_header', 'header_regex',
|
||||||
'remove_footer', 'footer_regex']
|
'remove_footer', 'footer_regex','html_unwrap_factor']
|
||||||
)
|
)
|
||||||
|
self.opt_html_unwrap_factor.setEnabled(False)
|
||||||
|
self.huf_label.setEnabled(False)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
for x in ('pagebreak', 'rule', 'both', 'none'):
|
for x in ('pagebreak', 'rule', 'both', 'none'):
|
||||||
self.opt_chapter_mark.addItem(x)
|
self.opt_chapter_mark.addItem(x)
|
||||||
@ -64,3 +66,8 @@ class StructureDetectionWidget(Widget, Ui_Form):
|
|||||||
_('The XPath expression %s is invalid.')%x.text).exec_()
|
_('The XPath expression %s is invalid.')%x.text).exec_()
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def set_value_handler(self, g, val):
|
||||||
|
if val is None and g is self.opt_html_unwrap_factor:
|
||||||
|
g.setValue(0.0)
|
||||||
|
return True
|
||||||
|
@ -14,10 +14,10 @@
|
|||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="0" column="0" colspan="2">
|
<item row="0" column="1" colspan="2">
|
||||||
<widget class="XPathEdit" name="opt_chapter" native="true"/>
|
<widget class="XPathEdit" name="opt_chapter" native="true"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="1" column="0" colspan="2">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Chapter &mark:</string>
|
<string>Chapter &mark:</string>
|
||||||
@ -27,31 +27,31 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="1">
|
<item row="1" column="2">
|
||||||
<widget class="QComboBox" name="opt_chapter_mark">
|
<widget class="QComboBox" name="opt_chapter_mark">
|
||||||
<property name="minimumContentsLength">
|
<property name="minimumContentsLength">
|
||||||
<number>20</number>
|
<number>20</number>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="2" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_remove_first_image">
|
<widget class="QCheckBox" name="opt_remove_first_image">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Remove first &image</string>
|
<string>Remove first &image</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0">
|
<item row="5" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_insert_metadata">
|
<widget class="QCheckBox" name="opt_insert_metadata">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Insert &metadata as page at start of book</string>
|
<string>Insert &metadata as page at start of book</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="10" column="0" colspan="2">
|
<item row="11" column="0" colspan="3">
|
||||||
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
|
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="11" column="0" colspan="2">
|
<item row="12" column="0" colspan="3">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -64,27 +64,66 @@
|
|||||||
</property>
|
</property>
|
||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
<item row="7" column="0">
|
<item row="8" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_remove_footer">
|
<widget class="QCheckBox" name="opt_remove_footer">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Remove F&ooter</string>
|
<string>Remove F&ooter</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="6" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_remove_header">
|
<widget class="QCheckBox" name="opt_remove_header">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Remove H&eader</string>
|
<string>Remove H&eader</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="0" colspan="2">
|
<item row="7" column="0" colspan="3">
|
||||||
<widget class="RegexEdit" name="opt_header_regex" native="true"/>
|
<widget class="RegexEdit" name="opt_header_regex" native="true"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="8" column="0" colspan="2">
|
<item row="9" column="0" colspan="3">
|
||||||
<widget class="RegexEdit" name="opt_footer_regex" native="true"/>
|
<widget class="RegexEdit" name="opt_footer_regex" native="true"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0">
|
<item row="4" column="1">
|
||||||
|
<widget class="QLabel" name="huf_label">
|
||||||
|
<property name="text">
|
||||||
|
<string>Line &un-wrap factor during preprocess:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_html_unwrap_factor</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="4" column="2">
|
||||||
|
<widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string/>
|
||||||
|
</property>
|
||||||
|
<property name="maximum">
|
||||||
|
<double>1.000000000000000</double>
|
||||||
|
</property>
|
||||||
|
<property name="singleStep">
|
||||||
|
<double>0.050000000000000</double>
|
||||||
|
</property>
|
||||||
|
<property name="value">
|
||||||
|
<double>0.400000000000000</double>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="4" column="0">
|
||||||
|
<spacer name="horizontalSpacer">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Horizontal</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>40</width>
|
||||||
|
<height>20</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_preprocess_html">
|
<widget class="QCheckBox" name="opt_preprocess_html">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Preprocess input file to possibly improve structure detection</string>
|
<string>&Preprocess input file to possibly improve structure detection</string>
|
||||||
@ -108,5 +147,38 @@
|
|||||||
</customwidget>
|
</customwidget>
|
||||||
</customwidgets>
|
</customwidgets>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections/>
|
<connections>
|
||||||
|
<connection>
|
||||||
|
<sender>opt_preprocess_html</sender>
|
||||||
|
<signal>toggled(bool)</signal>
|
||||||
|
<receiver>opt_html_unwrap_factor</receiver>
|
||||||
|
<slot>setEnabled(bool)</slot>
|
||||||
|
<hints>
|
||||||
|
<hint type="sourcelabel">
|
||||||
|
<x>328</x>
|
||||||
|
<y>87</y>
|
||||||
|
</hint>
|
||||||
|
<hint type="destinationlabel">
|
||||||
|
<x>481</x>
|
||||||
|
<y>113</y>
|
||||||
|
</hint>
|
||||||
|
</hints>
|
||||||
|
</connection>
|
||||||
|
<connection>
|
||||||
|
<sender>opt_preprocess_html</sender>
|
||||||
|
<signal>toggled(bool)</signal>
|
||||||
|
<receiver>huf_label</receiver>
|
||||||
|
<slot>setEnabled(bool)</slot>
|
||||||
|
<hints>
|
||||||
|
<hint type="sourcelabel">
|
||||||
|
<x>295</x>
|
||||||
|
<y>88</y>
|
||||||
|
</hint>
|
||||||
|
<hint type="destinationlabel">
|
||||||
|
<x>291</x>
|
||||||
|
<y>105</y>
|
||||||
|
</hint>
|
||||||
|
</hints>
|
||||||
|
</connection>
|
||||||
|
</connections>
|
||||||
</ui>
|
</ui>
|
||||||
|
@ -6,10 +6,7 @@ The dialog used to edit meta information for a book as well as
|
|||||||
add/remove formats
|
add/remove formats
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os, re, time, traceback, textwrap
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QThread, QDate, \
|
from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QThread, QDate, \
|
||||||
QPixmap, QListWidgetItem, QDialog, pyqtSignal
|
QPixmap, QListWidgetItem, QDialog, pyqtSignal
|
||||||
@ -331,6 +328,14 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
ResizableDialog.__init__(self, window)
|
ResizableDialog.__init__(self, window)
|
||||||
self.bc_box.layout().setAlignment(self.cover, Qt.AlignCenter|Qt.AlignHCenter)
|
self.bc_box.layout().setAlignment(self.cover, Qt.AlignCenter|Qt.AlignHCenter)
|
||||||
self.cancel_all = False
|
self.cancel_all = False
|
||||||
|
base = unicode(self.author_sort.toolTip())
|
||||||
|
self.ok_aus_tooltip = '<p>' + textwrap.fill(base+'<br><br>'+
|
||||||
|
_(' The green color indicates that the current '
|
||||||
|
'author sort matches the current author'))
|
||||||
|
self.bad_aus_tooltip = '<p>'+textwrap.fill(base + '<br><br>'+
|
||||||
|
_(' The red color indicates that the current '
|
||||||
|
'author sort does not match the current author'))
|
||||||
|
|
||||||
if cancel_all:
|
if cancel_all:
|
||||||
self.__abort_button = self.button_box.addButton(self.button_box.Abort)
|
self.__abort_button = self.button_box.addButton(self.button_box.Abort)
|
||||||
self.__abort_button.setToolTip(_('Abort the editing of all remaining books'))
|
self.__abort_button.setToolTip(_('Abort the editing of all remaining books'))
|
||||||
@ -490,6 +495,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
col = 'rgb(255, 0, 0, 20%)'
|
col = 'rgb(255, 0, 0, 20%)'
|
||||||
self.author_sort.setStyleSheet('QLineEdit { color: black; '
|
self.author_sort.setStyleSheet('QLineEdit { color: black; '
|
||||||
'background-color: %s; }'%col)
|
'background-color: %s; }'%col)
|
||||||
|
tt = self.ok_aus_tooltip if normal else self.bad_aus_tooltip
|
||||||
|
self.author_sort.setToolTip(tt)
|
||||||
|
|
||||||
def validate_isbn(self, isbn):
|
def validate_isbn(self, isbn):
|
||||||
isbn = unicode(isbn).strip()
|
isbn = unicode(isbn).strip()
|
||||||
|
@ -330,6 +330,17 @@ There are a few more options in this section.
|
|||||||
two covers. This option will simply remove the first image from the source document, thereby
|
two covers. This option will simply remove the first image from the source document, thereby
|
||||||
ensuring that the converted book has only one cover, the one specified in |app|.
|
ensuring that the converted book has only one cover, the one specified in |app|.
|
||||||
|
|
||||||
|
:guilabel:`Preprocess input`
|
||||||
|
This option activates various algorithms that try to detect and correct common cases of
|
||||||
|
badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc.
|
||||||
|
Turn this option on if your input document suffers from bad formatting. But be aware that in
|
||||||
|
some cases, this option can lead to worse results, so use with care.
|
||||||
|
|
||||||
|
:guilabel:`Line-unwrap factor`
|
||||||
|
This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this
|
||||||
|
option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
|
||||||
|
than the length of 40% of all lines in the document.
|
||||||
|
|
||||||
Table of Contents
|
Table of Contents
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user