KG updates

This commit is contained in:
GRiker 2011-01-18 03:18:40 -07:00
commit 0c20dd246d
22 changed files with 545 additions and 334 deletions

View File

@ -1,6 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
arstechnica.com
'''
@ -9,19 +8,26 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class ArsTechnica2(BasicNewsRecipe):
class ArsTechnica(BasicNewsRecipe):
title = u'Ars Technica'
language = 'en'
__author__ = 'Darko Miletic and Sujata Raman'
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
description = 'The art of technology'
publisher = 'Ars Technica'
category = 'news, IT, technology'
oldest_article = 2
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
extra_css = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
extra_css = '''
body {font-family: Arial,Helvetica,sans-serif}
.title{text-align: left}
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
.news-item-figure-caption-text{font-size:small; font-style:italic}
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
'''
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
conversion_options = {
'comments' : description
@ -31,10 +37,10 @@ class ArsTechnica2(BasicNewsRecipe):
}
preprocess_regexps = [
(re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
]
#preprocess_regexps = [
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
# ]
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
@ -42,7 +48,7 @@ class ArsTechnica2(BasicNewsRecipe):
dict(name=['object','link','embed'])
,dict(name='div', attrs={'class':'read-more-link'})
]
remove_attributes=['width','height']
#remove_attributes=['width','height']
feeds = [
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
@ -56,6 +62,7 @@ class ArsTechnica2(BasicNewsRecipe):
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
]
# This deals with multi-page stories
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'pager'})
if pager:
@ -81,6 +88,7 @@ class ArsTechnica2(BasicNewsRecipe):
def preprocess_html(self, soup):
# Adds line breaks near the byline (not sure why this is needed)
ftag = soup.find('div', attrs={'class':'byline'})
if ftag:
brtag = Tag(soup,'br')
@ -88,12 +96,33 @@ class ArsTechnica2(BasicNewsRecipe):
ftag.insert(4,brtag)
ftag.insert(5,brtag2)
# Remove style items
for item in soup.findAll(style=True):
del item['style']
# Remove id
for item in soup.findAll(id=True):
del item['id']
# For some reason, links to authors don't have the domainname
a_author = soup.find('a',{'href':re.compile("^/author")})
if a_author:
a_author['href'] = 'http://arstechnica.com'+a_author['href']
# within div class news-item-figure, we need to grab images
# Deal with multi-page stories
self.append_page(soup, soup.body, 3)
return soup
def get_article_url(self, article):
# If the article title starts with Etc:, don't return it
if self.ignoreEtcArticles:
article_title = article.get('title',None)
if re.match('Etc: ',article_title) is not None:
return None
# The actual article is in a guid tag
return article.get('guid', None).rpartition('?')[0]

View File

@ -28,7 +28,7 @@ class DilbertBig(BasicNewsRecipe):
,'publisher' : publisher
}
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip' )]
def get_article_url(self, article):
return article.get('feedburner_origlink', None)

View File

@ -586,105 +586,125 @@ class NYTimes(BasicNewsRecipe):
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):
try:
if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
if largeImg:
for inlineImg in inlineImgs:
inlineImg.extract()
else:
if inlineImgs:
firstImg = inlineImgs[0]
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
navstrings = cgFirst.findAll(text=True, recursive=False)
[ns.extract() for ns in navstrings]
headline_found = False
tag = cgFirst.find(True)
insertLoc = 0
while True:
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
headline_found = True
break
tag = tag.nextSibling
if not tag:
headline_found = False
break
if headline_found:
cgFirst.insert(insertLoc,firstImg)
else:
self.log(">>> No class:'columnGroup first' found <<<")
except:
self.log("ERROR: One picture per article in postprocess_html")
if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
if largeImg:
for inlineImg in inlineImgs:
inlineImg.extract()
else:
if inlineImgs:
firstImg = inlineImgs[0]
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
navstrings = cgFirst.findAll(text=True, recursive=False)
[ns.extract() for ns in navstrings]
headline_found = False
tag = cgFirst.find(True)
insertLoc = 0
while True:
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
headline_found = True
break
tag = tag.nextSibling
if not tag:
headline_found = False
break
if headline_found:
cgFirst.insert(insertLoc,firstImg)
else:
self.log(">>> No class:'columnGroup first' found <<<")
try:
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
except:
self.log("ERROR: Problem in change captions to italic")
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and caption.contents[0]:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
try:
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
else:
# Blog entry - replace headline, remove <hr> tags
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
else:
# Blog entry - replace headline, remove <hr> tags
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
try:
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
try:
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
try:
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
except:
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
try:
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
except:
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
return soup
return soup
def populate_article_metadata(self, article, soup, first):
shortparagraph = ""

View File

@ -586,105 +586,125 @@ class NYTimes(BasicNewsRecipe):
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):
try:
if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
if largeImg:
for inlineImg in inlineImgs:
inlineImg.extract()
else:
if inlineImgs:
firstImg = inlineImgs[0]
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
navstrings = cgFirst.findAll(text=True, recursive=False)
[ns.extract() for ns in navstrings]
headline_found = False
tag = cgFirst.find(True)
insertLoc = 0
while True:
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
headline_found = True
break
tag = tag.nextSibling
if not tag:
headline_found = False
break
if headline_found:
cgFirst.insert(insertLoc,firstImg)
else:
self.log(">>> No class:'columnGroup first' found <<<")
except:
self.log("ERROR: One picture per article in postprocess_html")
try:
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
except:
self.log("ERROR: Problem in change captions to italic")
try:
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
else:
# Blog entry - replace headline, remove <hr> tags
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
if largeImg:
for inlineImg in inlineImgs:
inlineImg.extract()
else:
if inlineImgs:
firstImg = inlineImgs[0]
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
navstrings = cgFirst.findAll(text=True, recursive=False)
[ns.extract() for ns in navstrings]
headline_found = False
tag = cgFirst.find(True)
insertLoc = 0
while True:
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
headline_found = True
break
tag = tag.nextSibling
if not tag:
headline_found = False
break
if headline_found:
cgFirst.insert(insertLoc,firstImg)
else:
self.log(">>> No class:'columnGroup first' found <<<")
try:
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and caption.contents[0]:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
else:
# Blog entry - replace headline, remove <hr> tags
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
return soup
try:
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
except:
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
try:
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
except:
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
return soup
def populate_article_metadata(self, article, soup, first):
shortparagraph = ""
try:

View File

@ -21,16 +21,53 @@ class SeattleTimes(BasicNewsRecipe):
encoding = 'cp1252'
language = 'en'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')]
feeds = [
(u'Top Stories',
u'http://seattletimes.nwsource.com/rss/home.xml'),
#(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')
(u'Business & Technology',
u'http://seattletimes.nwsource.com/rss/businesstechnology.xml'),
(u'Personal Technology',
u'http://seattletimes.nwsource.com/rss/personaltechnology.xml'),
(u'Entertainment & the Arts',
u'http://seattletimes.nwsource.com/rss/artsentertainment.xml'),
(u'Health',
u'http://seattletimes.nwsource.com/rss/health.xml'),
(u'Living',
u'http://seattletimes.nwsource.com/rss/living.xml'),
(u'Local News',
u'http://seattletimes.nwsource.com/rss/localnews.xml'),
(u'Nation & World',
u'http://seattletimes.nwsource.com/rss/nationworld.xml'),
(u'Opinion',
u'http://seattletimes.nwsource.com/rss/opinion.xml'),
(u'Politics',
u'http://seattletimes.nwsource.com/rss/politics.xml'),
(u'Sports',
u'http://seattletimes.nwsource.com/rss/sports.xml'),
(u'Nicole Brodeur',
u'http://seattletimes.nwsource.com/rss/nicolebrodeur.xml'),
(u'Danny Westneat',
u'http://seattletimes.nwsource.com/rss/dannywestneat.xml'),
(u'Jerry Large',
u'http://seattletimes.nwsource.com/rss/jerrylarge.xml'),
(u'Ron Judd',
u'http://seattletimes.nwsource.com/rss/ronjudd.xml'),
(u'Education',
u'http://seattletimes.nwsource.com/rss/education.xml'),
(u'Letters to the Editor',
u'http://seattletimes.nwsource.com/rss/northwestvoices.xml'),
(u'Travel',
u'http://seattletimes.nwsource.com/rss/travel.xml'),
(u'Outdoors',
u'http://seattletimes.nwsource.com/rss/outdoors.xml'),
(u'Steve Kelley',
u'http://seattletimes.nwsource.com/rss/stevekelley.xml'),
(u'Jerry Brewer',
u'http://seattletimes.nwsource.com/rss/jerrybrewer.xml'),
(u'Most Read Articles',
u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
]
remove_tags = [
dict(name=['object','link','script'])

View File

@ -178,7 +178,7 @@ class INVESBOOK(EB600):
class BOOQ(EB600):
name = 'Booq Device Interface'
gui_name = 'Booq'
gui_name = 'bq Reader'
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']

View File

@ -33,8 +33,8 @@ class PALMPRE(USBMS):
class AVANT(USBMS):
name = 'Booq Avant Device Interface'
gui_name = 'Avant'
description = _('Communicate with the Booq Avant')
gui_name = 'bq Avant'
description = _('Communicate with the Bq Avant')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']

View File

@ -360,7 +360,7 @@ class HTMLPreProcessor(object):
(re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
# Center separator lines
(re.compile(u'<br>\s*(?P<break>([*#•✦]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
(re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
# Remove page links
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
@ -480,7 +480,7 @@ class HTMLPreProcessor(object):
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
end_rules.append(
# Un wrap using punctuation
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
(re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
)
for rule in self.PREPROCESS + start_rules:

View File

@ -151,13 +151,13 @@ class PreProcessor(object):
n_lookahead_open = "\s+(?!"
n_lookahead_close = ")"
default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\:\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
chapter_types = [
[r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\'?\s*){0,5}", True, "Searching for common Chapter Headings"],
[r"([A-Z-]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"], # Spaced Lettering
[r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines
[r"[^'\"]?(\d+(\.|:)|CHAPTER)\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"], # Numeric Chapters
[r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"], # Spaced Lettering
[r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles
[r"[^'\"]?(\d+|CHAPTER)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, "Searching for simple numeric chapter headings"], # Numeric Chapters, no dot or colon
[r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters
@ -186,7 +186,7 @@ class PreProcessor(object):
def punctuation_unwrap(self, length, content, format):
# define the pieces of the regex
lookahead = "(?<=.{"+str(length)+"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+str(length)+"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
@ -357,6 +357,6 @@ class PreProcessor(object):
html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
# Center separator lines
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
return html

View File

@ -251,7 +251,7 @@ class Serializer(object):
tag = prefixname(elem.tag, nsrmap)
# Previous layers take care of @name
id = elem.attrib.pop('id', None)
if id is not None:
if id:
href = '#'.join((item.href, id))
offset = self.anchor_offset or buffer.tell()
self.id_offsets[urlnormalize(href)] = offset

View File

@ -34,7 +34,7 @@ class TXTInput(InputFormatPlugin):
'starts a paragraph.'
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
OptionRecommendation(name='formatting_type', recommended_value='auto',
choices=['auto', 'none', 'heuristic', 'markdown'],
choices=['auto', 'none', 'heuristic', 'textile', 'markdown'],
help=_('Formatting used within the document.'
'* auto: Automatically decide which formatting processor to use.\n'
'* none: Do not process the document formatting. Everything is a '

View File

@ -91,13 +91,14 @@ class AddAction(InterfaceAction):
self.gui.library_view.model().db.import_book(MetaInformation(None), [])
self.gui.library_view.model().books_added(num)
def add_isbns(self, books):
def add_isbns(self, books, add_tags=[]):
from calibre.ebooks.metadata import MetaInformation
ids = set([])
db = self.gui.library_view.model().db
for x in books:
mi = MetaInformation(None)
mi.isbn = x['isbn']
db = self.gui.library_view.model().db
if x['path'] is not None:
ids.add(db.import_book(mi, [x['path']]))
else:
@ -109,6 +110,8 @@ class AddAction(InterfaceAction):
self.gui.iactions['Edit Metadata'].do_download_metadata(ids)
finally:
config['overwrite_author_title_metadata'] = orig
if add_tags and ids:
db.bulk_modify_tags(ids, add=add_tags)
def files_dropped(self, paths):
@ -166,7 +169,7 @@ class AddAction(InterfaceAction):
from calibre.gui2.dialogs.add_from_isbn import AddFromISBN
d = AddFromISBN(self.gui)
if d.exec_() == d.Accepted:
self.add_isbns(d.books)
self.add_isbns(d.books, add_tags=d.set_tags)
def add_books(self, *args):
'''

View File

@ -593,6 +593,11 @@ class Editor(QWidget): # {{{
def code_dirtied(self, *args):
self.source_dirty = True
def hide_toolbars(self):
self.toolbar1.setVisible(False)
self.toolbar2.setVisible(False)
self.toolbar3.setVisible(False)
# }}}
if __name__ == '__main__':

View File

@ -18,6 +18,7 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ptempfile import PersistentTemporaryFile
from calibre.gui2.convert import Widget
from calibre.utils.icu import sort_key
from calibre.library.comments import comments_to_html
def create_opf_file(db, book_id):
mi = db.get_metadata(book_id, index_is_id=True)
@ -57,6 +58,7 @@ class MetadataWidget(Widget, Ui_Form):
self.initialize_metadata_options()
self.initialize_options(get_option, get_help, db, book_id)
self.connect(self.cover_button, SIGNAL("clicked()"), self.select_cover)
self.comment.hide_toolbars()
def deduce_author_sort(self, *args):
au = unicode(self.author.currentText())
@ -79,7 +81,7 @@ class MetadataWidget(Widget, Ui_Form):
self.author_sort.setText(mi.author_sort if mi.author_sort else '')
self.tags.setText(', '.join(mi.tags if mi.tags else []))
self.tags.update_items_cache(self.db.all_tags())
self.comment.setPlainText(mi.comments if mi.comments else '')
self.comment.html = comments_to_html(mi.comments) if mi.comments else ''
if mi.series:
self.series.setCurrentIndex(self.series.findText(mi.series))
if mi.series_index is not None:
@ -154,7 +156,7 @@ class MetadataWidget(Widget, Ui_Form):
author_sort = unicode(self.author_sort.text()).strip()
if author_sort:
mi.author_sort = author_sort
comments = unicode(self.comment.toPlainText()).strip()
comments = self.comment.html
if comments:
mi.comments = comments
mi.series_index = float(self.series_index.value())

View File

@ -20,30 +20,6 @@
<string>Book Cover</string>
</property>
<layout class="QGridLayout" name="_2">
<item row="0" column="0">
<layout class="QHBoxLayout" name="_3">
<item>
<widget class="ImageView" name="cover" native="true">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_prefer_metadata_cover">
<property name="text">
<string>Use cover from &amp;source file</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0">
<layout class="QVBoxLayout" name="_4">
<property name="spacing">
@ -95,6 +71,30 @@
</item>
</layout>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_prefer_metadata_cover">
<property name="text">
<string>Use cover from &amp;source file</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item row="0" column="0">
<layout class="QHBoxLayout" name="_3">
<item>
<widget class="ImageView" name="cover" native="true">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
</widget>
</item>
</layout>
</item>
</layout>
<zorder>opt_prefer_metadata_cover</zorder>
<zorder></zorder>
@ -264,35 +264,7 @@
</layout>
</item>
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Minimum">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="maximumSize">
<size>
<width>16777215</width>
<height>200</height>
</size>
</property>
<property name="title">
<string>Comments</string>
</property>
<layout class="QGridLayout" name="_8">
<item row="0" column="0">
<widget class="QTextEdit" name="comment">
<property name="maximumSize">
<size>
<width>16777215</width>
<height>180</height>
</size>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="Editor" name="comment" native="true"/>
</item>
</layout>
</item>
@ -325,6 +297,12 @@
<header>calibre/gui2/widgets.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>Editor</class>
<extends>QWidget</extends>
<header>calibre/gui2/comments_editor.h</header>
<container>1</container>
</customwidget>
</customwidgets>
<tabstops>
<tabstop>title</tabstop>
@ -334,7 +312,6 @@
<tabstop>tags</tabstop>
<tabstop>series</tabstop>
<tabstop>series_index</tabstop>
<tabstop>comment</tabstop>
<tabstop>cover_path</tabstop>
<tabstop>cover_button</tabstop>
<tabstop>opt_prefer_metadata_cover</tabstop>

View File

@ -12,6 +12,7 @@ from PyQt4.Qt import QDialog, QApplication
from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog
from calibre.ebooks.metadata import check_isbn
from calibre.constants import iswindows
from calibre.gui2 import gprefs
class AddFromISBN(QDialog, Ui_Dialog):
@ -25,7 +26,9 @@ class AddFromISBN(QDialog, Ui_Dialog):
self.isbns = []
self.books = []
self.set_tags = []
self.paste_button.clicked.connect(self.paste)
self.add_tags.setText(', '.join(gprefs.get('add from ISBN tags', [])))
def paste(self, *args):
app = QApplication.instance()
@ -37,6 +40,10 @@ class AddFromISBN(QDialog, Ui_Dialog):
self.isbn_box.setPlainText(new)
def accept(self, *args):
tags = unicode(self.add_tags.text()).strip().split(',')
tags = list(filter(None, [x.strip() for x in tags]))
gprefs['add from ISBN tags'] = tags
self.set_tags = tags
for line in unicode(self.isbn_box.toPlainText()).strip().splitlines():
line = line.strip()
if not line:

View File

@ -18,8 +18,19 @@
<normaloff>:/images/add_book.png</normaloff>:/images/add_book.png</iconset>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QPlainTextEdit" name="isbn_box"/>
<item row="0" column="0" rowspan="2">
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QPlainTextEdit" name="isbn_box"/>
</item>
<item>
<widget class="QPushButton" name="paste_button">
<property name="text">
<string>&amp;Paste from clipboard</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="1">
<widget class="QLabel" name="label">
@ -34,6 +45,36 @@
</property>
</widget>
</item>
<item row="1" column="1">
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QLabel" name="label_2">
<property name="text">
<string>&amp;Tags to set on created book entries:</string>
</property>
<property name="buddy">
<cstring>add_tags</cstring>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="add_tags"/>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="2" column="0" colspan="2">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
@ -44,13 +85,6 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QPushButton" name="paste_button">
<property name="text">
<string>&amp;Paste from clipboard</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources>

View File

@ -49,7 +49,6 @@ def get_cover_data(path): # {{{
return cdata, area
# }}}
class MyBlockingBusy(QDialog): # {{{
do_one_signal = pyqtSignal()
@ -134,7 +133,7 @@ class MyBlockingBusy(QDialog): # {{{
do_autonumber, do_remove_format, remove_format, do_swap_ta, \
do_remove_conv, do_auto_author, series, do_series_restart, \
series_start_value, do_title_case, cover_action, clear_series, \
pubdate = self.args
pubdate, adddate = self.args
# first loop: do author and title. These will commit at the end of each
@ -214,6 +213,9 @@ class MyBlockingBusy(QDialog): # {{{
if pubdate is not None:
self.db.set_pubdate(id, pubdate, notify=False, commit=False)
if adddate is not None:
self.db.set_timestamp(id, adddate, notify=False, commit=False)
if do_series:
if do_series_restart:
if self.series_start_value is None:
@ -300,6 +302,10 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
self.pubdate.setSpecialValueText(_('Undefined'))
self.clear_pubdate_button.clicked.connect(self.clear_pubdate)
self.pubdate.dateChanged.connect(self.do_apply_pubdate)
self.adddate.setMinimumDate(UNDEFINED_QDATE)
self.adddate.setSpecialValueText(_('Undefined'))
self.clear_adddate_button.clicked.connect(self.clear_adddate)
self.adddate.dateChanged.connect(self.do_apply_adddate)
if len(self.db.custom_field_keys(include_composites=False)) == 0:
self.central_widget.removeTab(1)
@ -322,6 +328,12 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
def clear_pubdate(self, *args):
self.pubdate.setDate(UNDEFINED_QDATE)
def do_apply_adddate(self, *args):
self.apply_adddate.setChecked(True)
def clear_adddate(self, *args):
self.adddate.setDate(UNDEFINED_QDATE)
def button_clicked(self, which):
if which == self.button_box.button(QDialogButtonBox.Apply):
self.do_again = True
@ -726,7 +738,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
name = name.strip().replace('|', ',')
self.authors.addItem(name)
self.authors.setEditText('')
self.authors.set_separator('&')
self.authors.set_space_before_sep(True)
self.authors.update_items_cache(self.db.all_author_names())
@ -805,9 +817,11 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
do_remove_conv = self.remove_conversion_settings.isChecked()
do_auto_author = self.auto_author_sort.isChecked()
do_title_case = self.change_title_to_title_case.isChecked()
pubdate = None
pubdate = adddate = None
if self.apply_pubdate.isChecked():
pubdate = qt_to_dt(self.pubdate.date())
if self.apply_adddate.isChecked():
adddate = qt_to_dt(self.adddate.date())
cover_action = None
if self.cover_remove.isChecked():
@ -821,7 +835,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
do_autonumber, do_remove_format, remove_format, do_swap_ta,
do_remove_conv, do_auto_author, series, do_series_restart,
series_start_value, do_title_case, cover_action, clear_series,
pubdate)
pubdate, adddate)
bb = MyBlockingBusy(_('Applying changes to %d books.\nPhase {0} {1}%%.')
%len(self.ids), args, self.db, self.ids,

View File

@ -347,6 +347,51 @@ from the value in the box</string>
</item>
</layout>
</item>
<item row="9" column="0">
<widget class="QLabel" name="label_10">
<property name="text">
<string>&amp;Date:</string>
</property>
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>adddate</cstring>
</property>
</widget>
</item>
<item row="9" column="1">
<layout class="QHBoxLayout" name="horizontalLayout_5">
<item>
<widget class="QDateEdit" name="adddate">
<property name="displayFormat">
<string>d MMM yyyy</string>
</property>
<property name="calendarPopup">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QToolButton" name="clear_adddate_button">
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
</property>
</widget>
</item>
</layout>
</item>
<item row="9" column="2">
<widget class="QCheckBox" name="apply_adddate">
<property name="text">
<string>&amp;Apply date</string>
</property>
</widget>
</item>
<item row="10" column="0">
<widget class="QLabel" name="label_9">
<property name="text">
@ -395,6 +440,42 @@ from the value in the box</string>
</property>
</widget>
</item>
<item row="11" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Remove &amp;format:</string>
</property>
<property name="buddy">
<cstring>remove_format</cstring>
</property>
</widget>
</item>
<item row="11" column="1">
<widget class="QComboBox" name="remove_format">
<property name="maximumSize">
<size>
<width>120</width>
<height>16777215</height>
</size>
</property>
</widget>
</item>
<item row="12" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>15</height>
</size>
</property>
</spacer>
</item>
<item row="13" column="0" colspan="3">
<layout class="QHBoxLayout" name="horizontalLayout_3">
<item>
@ -478,42 +559,6 @@ Future conversion of these books will use the default settings.</string>
</property>
</spacer>
</item>
<item row="12" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>15</height>
</size>
</property>
</spacer>
</item>
<item row="11" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Remove &amp;format:</string>
</property>
<property name="buddy">
<cstring>remove_format</cstring>
</property>
</widget>
</item>
<item row="11" column="1">
<widget class="QComboBox" name="remove_format">
<property name="maximumSize">
<size>
<width>120</width>
<height>16777215</height>
</size>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab">

View File

@ -196,6 +196,12 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def modify_plugin(self, op=''):
index = self.plugin_view.currentIndex()
if index.isValid():
if not index.parent().isValid():
name = unicode(index.data().toString())
return error_dialog(self, _('Error'), '<p>'+
_('Select an actual plugin under <b>%s</b> to customize')%name,
show=True, show_copy_button=False)
plugin = self._plugin_model.index_to_plugin(index)
if op == 'toggle':
if not plugin.can_be_disabled:

View File

@ -485,7 +485,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
if 'calibre.ebooks.DRMError' in job.details:
if not minz:
from calibre.gui2.dialogs.drm_error import DRMErrorMessage
d = DRMErrorMessage(self, job.description.split(':')[-1])
d = DRMErrorMessage(self, _('Cannot convert') + ' ' +
job.description.split(':')[-1].partition('(')[-1][:-1])
d.setModal(False)
d.show()
self._modeless_dialogs.append(d)

View File

@ -111,7 +111,7 @@ class Kobo(Device):
id = 'kobo'
class Booq(Device):
name = 'Booq Reader'
name = 'bq Classic'
manufacturer = 'Booq'
output_profile = 'sony'
output_format = 'EPUB'
@ -125,7 +125,18 @@ class TheBook(Device):
id = 'thebook'
class Avant(Booq):
name = 'Booq Avant'
name = 'bq Avant'
class AvantXL(Booq):
name = 'bq Avant XL'
output_profile = 'ipad'
class BooqPocketPlus(Booq):
name = 'bq Pocket Plus'
output_profile = 'sony300'
class BooqCervantes(Booq):
name = 'bq Cervantes'
class Sony300(Sony505):