mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
GwR changes 2/4/10
This commit is contained in:
commit
b33c664009
@ -17,14 +17,6 @@ p.author {
|
|||||||
font-size:large;
|
font-size:large;
|
||||||
}
|
}
|
||||||
|
|
||||||
p.series {
|
|
||||||
margin-top:0em;
|
|
||||||
margin-bottom:0em;
|
|
||||||
text-align: left;
|
|
||||||
text-indent: 1em;
|
|
||||||
font-size:small;
|
|
||||||
}
|
|
||||||
|
|
||||||
p.tags {
|
p.tags {
|
||||||
margin-top:0em;
|
margin-top:0em;
|
||||||
margin-bottom:0em;
|
margin-bottom:0em;
|
||||||
@ -63,6 +55,14 @@ p.author_index {
|
|||||||
text-indent: 0em;
|
text-indent: 0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.series {
|
||||||
|
text-align: left;
|
||||||
|
margin-top:0px;
|
||||||
|
margin-bottom:0px;
|
||||||
|
margin-left:2em;
|
||||||
|
text-indent:-2em;
|
||||||
|
}
|
||||||
|
|
||||||
p.read_book {
|
p.read_book {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
margin-top:0px;
|
margin-top:0px;
|
||||||
|
BIN
resources/images/news/gizmodo.png
Normal file
BIN
resources/images/news/gizmodo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 640 B |
BIN
resources/images/news/newsstraitstimes.png
Normal file
BIN
resources/images/news/newsstraitstimes.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 816 B |
BIN
resources/images/news/readitlater.png
Normal file
BIN
resources/images/news/readitlater.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 810 B |
BIN
resources/images/news/tidbits.png
Normal file
BIN
resources/images/news/tidbits.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 783 B |
40
resources/recipes/gizmodo.recipe
Normal file
40
resources/recipes/gizmodo.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
gizmodo.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gizmodo(BasicNewsRecipe):
|
||||||
|
title = 'Gizmodo'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||||
|
publisher = 'gizmodo.com'
|
||||||
|
category = 'news, IT, Internet, gadgets'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'en'
|
||||||
|
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
|
||||||
|
extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='div',attrs={'class':'feedflare'})]
|
||||||
|
remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
35
resources/recipes/newsstraitstimes.recipe
Normal file
35
resources/recipes/newsstraitstimes.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.nst.com.my
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Newstraitstimes(BasicNewsRecipe):
|
||||||
|
title = 'New Straits Times from Malaysia'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
|
||||||
|
publisher = 'nst.com.my'
|
||||||
|
category = 'news, politics, Malaysia'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['link','table'])]
|
||||||
|
keep_only_tags = dict(name='div',attrs={'id':'haidah'})
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
|
||||||
|
|
@ -5,9 +5,10 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
pagina12.com.ar
|
pagina12.com.ar
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import time
|
import re, time
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
class Pagina12(BasicNewsRecipe):
|
||||||
title = 'Pagina - 12'
|
title = 'Pagina - 12'
|
||||||
@ -22,7 +23,8 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = ' body{font-family: sans-serif} '
|
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -52,7 +54,11 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
|
rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
|
||||||
weekday = time.localtime().tm_wday
|
rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
|
||||||
return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
|
soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
|
||||||
|
for image in soup.findAll('img',alt=True):
|
||||||
|
if image['alt'].startswith('Tapa de la fecha'):
|
||||||
|
return image['src']
|
||||||
|
return None
|
||||||
|
|
64
resources/recipes/readitlater.recipe
Normal file
64
resources/recipes/readitlater.recipe
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
readitlaterlist.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Readitlater(BasicNewsRecipe):
|
||||||
|
title = 'Read It Later'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||||
|
setup up your news. Fill in your account
|
||||||
|
username, and optionally you can add password.'''
|
||||||
|
publisher = 'readitlater.com'
|
||||||
|
category = 'news, custom'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
needs_subscription = True
|
||||||
|
INDEX = u'http://readitlaterlist.com'
|
||||||
|
LOGIN = INDEX + u'/l'
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Unread articles' , INDEX + u'/unread')]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None:
|
||||||
|
br.open(self.LOGIN)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br['feed_id'] = self.username
|
||||||
|
if self.password is not None:
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
ritem = soup.find('ul',attrs={'id':'list'})
|
||||||
|
for item in ritem.findAll('li'):
|
||||||
|
description = ''
|
||||||
|
atag = item.find('a',attrs={'class':'text'})
|
||||||
|
if atag and atag.has_key('href'):
|
||||||
|
url = self.INDEX + atag['href']
|
||||||
|
title = self.tag_to_string(item.div)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
@ -1,22 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class The_Gazette(BasicNewsRecipe):
|
|
||||||
|
|
||||||
cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
|
|
||||||
title = u'The Gazette'
|
|
||||||
__author__ = 'Jerry Clapperton'
|
|
||||||
description = 'Montreal news in English'
|
|
||||||
language = 'en_CA'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
|
|
||||||
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
|
|
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
||||||
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
||||||
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
||||||
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
||||||
('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
|
('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
|
||||||
('World', 'http://www.tnr.com/rss/articles/World'),
|
('World', 'http://www.tnr.com/rss/articles/World'),
|
||||||
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
||||||
('Books', 'http://www.tnr.com/rss/articles/books'),
|
('Books', 'http://www.tnr.com/rss/articles/books'),
|
||||||
|
('The Book', 'http://www.tnr.com/rss/book'),
|
||||||
|
('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
|
||||||
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
||||||
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
||||||
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
||||||
('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
|
|
||||||
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
||||||
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
||||||
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
||||||
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
||||||
|
|
||||||
|
53
resources/recipes/tidbits.recipe
Normal file
53
resources/recipes/tidbits.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
db.tidbits.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TidBITS(BasicNewsRecipe):
|
||||||
|
title = 'TidBITS: Mac News for the Rest of Us'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
|
||||||
|
publisher = 'TidBITS Publishing Inc.'
|
||||||
|
category = 'news, Apple, Macintosh, IT, Internet'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://db.tidbits.com/images/tblogo9.gif'
|
||||||
|
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='small')]
|
||||||
|
remove_tags_after = dict(name='small')
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Business Apps' , u'http://db.tidbits.com/feeds/business.rss' )
|
||||||
|
,(u'Entertainment' , u'http://db.tidbits.com/feeds/entertainment.rss')
|
||||||
|
,(u'External Links' , u'http://db.tidbits.com/feeds/links.rss' )
|
||||||
|
,(u'Home Mac' , u'http://db.tidbits.com/feeds/home.rss' )
|
||||||
|
,(u'Inside TidBITS' , u'http://db.tidbits.com/feeds/inside.rss' )
|
||||||
|
,(u'iPod & iPhone' , u'http://db.tidbits.com/feeds/ipod-iphone.rss' )
|
||||||
|
,(u'Just for Fun' , u'http://db.tidbits.com/feeds/fun.rss' )
|
||||||
|
,(u'Macs & Mac OS X' , u'http://db.tidbits.com/feeds/macs.rss' )
|
||||||
|
,(u'Media Creation' , u'http://db.tidbits.com/feeds/creative.rss' )
|
||||||
|
,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss' )
|
||||||
|
,(u'Opinion & Editorial' , u'http://db.tidbits.com/feeds/opinion.rss' )
|
||||||
|
,(u'Support & Problem Solving' , u'http://db.tidbits.com/feeds/support.rss' )
|
||||||
|
,(u'Safe Computing' , u'http://db.tidbits.com/feeds/security.rss' )
|
||||||
|
,(u'Tech News' , u'http://db.tidbits.com/feeds/tech.rss' )
|
||||||
|
,(u'Software Watchlist' , u'http://db.tidbits.com/feeds/watchlist.rss' )
|
||||||
|
]
|
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
# first, check if there is an h3 tag which provides a section name
|
# first, check if there is an h3 tag which provides a section name
|
||||||
stag = divtag.find('h3')
|
stag = divtag.find('h3')
|
||||||
if stag:
|
if stag:
|
||||||
if stag.parent['class'] == 'dynamic':
|
if stag.parent.get('class', '') == 'dynamic':
|
||||||
# a carousel of articles is too complex to extract a section name
|
# a carousel of articles is too complex to extract a section name
|
||||||
# for each article, so we'll just call the section "Carousel"
|
# for each article, so we'll just call the section "Carousel"
|
||||||
section_name = 'Carousel'
|
section_name = 'Carousel'
|
||||||
|
@ -42,7 +42,7 @@ class Writer(FormatWriter):
|
|||||||
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
||||||
|
|
||||||
text, text_sizes = self._text(pml)
|
text, text_sizes = self._text(pml)
|
||||||
chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
|
chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
|
||||||
chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
|
chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
|
||||||
chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
|
chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
|
||||||
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
||||||
|
@ -262,7 +262,6 @@ class Region(object):
|
|||||||
max_lines = max(max_lines, len(c))
|
max_lines = max(max_lines, len(c))
|
||||||
return max_lines
|
return max_lines
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_small(self):
|
def is_small(self):
|
||||||
return self.line_count < 3
|
return self.line_count < 3
|
||||||
@ -438,9 +437,8 @@ class Page(object):
|
|||||||
# absorb into a neighboring region (prefer the one with number of cols
|
# absorb into a neighboring region (prefer the one with number of cols
|
||||||
# closer to the avg number of cols in the set, if equal use larger
|
# closer to the avg number of cols in the set, if equal use larger
|
||||||
# region)
|
# region)
|
||||||
# merge contiguous regions that can contain each other
|
|
||||||
'''absorbed = set([])
|
|
||||||
found = True
|
found = True
|
||||||
|
absorbed = set([])
|
||||||
while found:
|
while found:
|
||||||
found = False
|
found = False
|
||||||
for i, region in enumerate(self.regions):
|
for i, region in enumerate(self.regions):
|
||||||
@ -452,10 +450,33 @@ class Page(object):
|
|||||||
regions.append(self.regions[j])
|
regions.append(self.regions[j])
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
prev = None if i == 0 else i-1
|
prev_region = None if i == 0 else i-1
|
||||||
next = j if self.regions[j] not in regions else None
|
next_region = j if self.regions[j] not in regions else None
|
||||||
'''
|
if prev_region is None and next_region is not None:
|
||||||
pass
|
absorb_into = next_region
|
||||||
|
elif next_region is None and prev_region is not None:
|
||||||
|
absorb_into = prev_region
|
||||||
|
elif prev_region is None and next_region is None:
|
||||||
|
if len(regions) > 1:
|
||||||
|
absorb_into = regions[0]
|
||||||
|
regions = regions[1:]
|
||||||
|
else:
|
||||||
|
absorb_into = None
|
||||||
|
else:
|
||||||
|
absorb_into = prev_region
|
||||||
|
if next_region.line_count >= prev_region.line_count:
|
||||||
|
avg_column_count = sum([len(r.columns) for r in
|
||||||
|
regions])/float(len(regions))
|
||||||
|
if next_region.line_count > prev_region.line_count \
|
||||||
|
or abs(avg_column_count - len(prev_region.columns)) \
|
||||||
|
> abs(avg_column_count - len(next_region.columns)):
|
||||||
|
absorb_into = next_region
|
||||||
|
if absorb_into is not None:
|
||||||
|
absorb_into.absorb_region(regions)
|
||||||
|
absorbed.update(regions)
|
||||||
|
i = j
|
||||||
|
for region in absorbed:
|
||||||
|
self.regions.remove(region)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,7 +57,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
|
|||||||
setattr(opts,option, fmt_options[option])
|
setattr(opts,option, fmt_options[option])
|
||||||
|
|
||||||
# Fetch and run the plugin for fmt
|
# Fetch and run the plugin for fmt
|
||||||
|
# Returns 0 if successful, 1 if no catalog built
|
||||||
plugin = plugin_for_catalog_format(fmt)
|
plugin = plugin_for_catalog_format(fmt)
|
||||||
plugin.run(out_file_name, opts, db, notification=notification)
|
return plugin.run(out_file_name, opts, db, notification=notification)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1394,6 +1394,11 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
||||||
|
|
||||||
def catalog_generated(self, job):
|
def catalog_generated(self, job):
|
||||||
|
if job.result:
|
||||||
|
# Search terms nulled catalog results
|
||||||
|
return error_dialog(self, _('No books found'),
|
||||||
|
_("No books to catalog\nCheck exclude tags"),
|
||||||
|
show=True)
|
||||||
if job.failed:
|
if job.failed:
|
||||||
return self.job_exception(job)
|
return self.job_exception(job)
|
||||||
id = self.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
|
id = self.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
|
||||||
|
@ -996,25 +996,21 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
# Return -1 if x<y
|
# Return -1 if x<y
|
||||||
# Return 0 if x==y
|
# Return 0 if x==y
|
||||||
# Return 1 if x>y
|
# Return 1 if x>y
|
||||||
#print "x['author_sort']: %s y['author_sort']: %s" % (x['author_sort'],y['author_sort'])
|
# Different authors - sort by author_sort
|
||||||
if x['author_sort'] > y['author_sort']:
|
if x['author_sort'] > y['author_sort']:
|
||||||
return 1
|
return 1
|
||||||
elif x['author_sort'] < y['author_sort']:
|
elif x['author_sort'] < y['author_sort']:
|
||||||
return -1
|
return -1
|
||||||
else:
|
else:
|
||||||
# Authors equal
|
# Same author
|
||||||
# Books w/o series go first
|
if x['series'] != y['series']:
|
||||||
if x['series'] > y['series']:
|
# Different series
|
||||||
return 1
|
if x['title_sort'].lstrip() > y['title_sort'].lstrip():
|
||||||
elif x['series'] < y['series']:
|
|
||||||
return -1
|
|
||||||
elif not x['series'] and not y['series']:
|
|
||||||
if self.generateSortTitle(x['title']) > self.generateSortTitle(y['title']):
|
|
||||||
return 1
|
return 1
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
else:
|
else:
|
||||||
# Both books have series
|
# Same series
|
||||||
if x['series'] == y['series']:
|
if x['series'] == y['series']:
|
||||||
if float(x['series_index']) > float(y['series_index']):
|
if float(x['series_index']) > float(y['series_index']):
|
||||||
return 1
|
return 1
|
||||||
@ -1041,12 +1037,14 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
|
|
||||||
if False and self.verbose:
|
if False and self.verbose:
|
||||||
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
|
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
|
||||||
self.opts.log.info(" %-40s %-20s %s" % ('title', 'series', 'series_index'))
|
self.opts.log.info(" %-30s %-20s %s" % ('title', 'title_sort','series', 'series_index'))
|
||||||
for title in self.booksByAuthor:
|
for title in self.booksByAuthor:
|
||||||
self.opts.log.info((u" %-40s %-20s %s" % \
|
self.opts.log.info((u" %-30s %-20s %-20s%5s " % \
|
||||||
(title['title'][0:40],
|
(title['title'][:30],
|
||||||
title['series'][0:20] if title['series'] else '',
|
title['series'][:20] if title['series'] else '',
|
||||||
title['series_index'])).encode('utf-8'))
|
title['series_index'],
|
||||||
|
)).encode('utf-8'))
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
# Build the unique_authors set from existing data
|
# Build the unique_authors set from existing data
|
||||||
authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
|
authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
|
||||||
@ -1396,8 +1394,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
dtc = 0
|
dtc = 0
|
||||||
current_letter = ""
|
current_letter = ""
|
||||||
current_author = ""
|
current_author = ""
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Loop through books_by_author
|
# Loop through booksByAuthor
|
||||||
book_count = 0
|
book_count = 0
|
||||||
for book in self.booksByAuthor:
|
for book in self.booksByAuthor:
|
||||||
book_count += 1
|
book_count += 1
|
||||||
@ -1435,11 +1434,23 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pAuthorTag)
|
divTag.insert(dtc,pAuthorTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
|
# Check for series
|
||||||
|
if book['series'] and book['series'] != current_series:
|
||||||
|
# Start a new series
|
||||||
|
current_series = book['series']
|
||||||
|
pSeriesTag = Tag(soup,'p')
|
||||||
|
pSeriesTag['class'] = "series"
|
||||||
|
pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + book['series']))
|
||||||
|
divTag.insert(dtc,pSeriesTag)
|
||||||
|
dtc += 1
|
||||||
|
if current_series and not book['series']:
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Add books
|
# Add books
|
||||||
pBookTag = Tag(soup, "p")
|
pBookTag = Tag(soup, "p")
|
||||||
ptc = 0
|
ptc = 0
|
||||||
|
|
||||||
# Prefix book with read/unread symbol
|
# book with read/unread symbol
|
||||||
if book['read']:
|
if book['read']:
|
||||||
# check mark
|
# check mark
|
||||||
pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
|
pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
|
||||||
@ -1454,6 +1465,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
aTag = Tag(soup, "a")
|
aTag = Tag(soup, "a")
|
||||||
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
||||||
# Use series, series index if avail else just title
|
# Use series, series index if avail else just title
|
||||||
|
if current_series:
|
||||||
|
aTag.insert(0,escape(book['title'][len(book['series'])+1:]))
|
||||||
|
else:
|
||||||
aTag.insert(0,escape(book['title']))
|
aTag.insert(0,escape(book['title']))
|
||||||
pBookTag.insert(ptc, aTag)
|
pBookTag.insert(ptc, aTag)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
@ -1506,6 +1520,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pIndexTag)
|
divTag.insert(dtc,pIndexTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
current_author = None
|
current_author = None
|
||||||
|
current_series = None
|
||||||
|
|
||||||
for new_entry in this_months_list:
|
for new_entry in this_months_list:
|
||||||
if new_entry['author'] != current_author:
|
if new_entry['author'] != current_author:
|
||||||
@ -1522,6 +1537,18 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pAuthorTag)
|
divTag.insert(dtc,pAuthorTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
|
# Check for series
|
||||||
|
if new_entry['series'] and new_entry['series'] != current_series:
|
||||||
|
# Start a new series
|
||||||
|
current_series = new_entry['series']
|
||||||
|
pSeriesTag = Tag(soup,'p')
|
||||||
|
pSeriesTag['class'] = "series"
|
||||||
|
pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + new_entry['series']))
|
||||||
|
divTag.insert(dtc,pSeriesTag)
|
||||||
|
dtc += 1
|
||||||
|
if current_series and not new_entry['series']:
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Add books
|
# Add books
|
||||||
pBookTag = Tag(soup, "p")
|
pBookTag = Tag(soup, "p")
|
||||||
ptc = 0
|
ptc = 0
|
||||||
@ -1540,6 +1567,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
|
|
||||||
aTag = Tag(soup, "a")
|
aTag = Tag(soup, "a")
|
||||||
aTag['href'] = "book_%d.html" % (int(float(new_entry['id'])))
|
aTag['href'] = "book_%d.html" % (int(float(new_entry['id'])))
|
||||||
|
if current_series:
|
||||||
|
aTag.insert(0,escape(new_entry['title'][len(new_entry['series'])+1:]))
|
||||||
|
else:
|
||||||
aTag.insert(0,escape(new_entry['title']))
|
aTag.insert(0,escape(new_entry['title']))
|
||||||
pBookTag.insert(ptc, aTag)
|
pBookTag.insert(ptc, aTag)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
@ -1641,6 +1671,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
this_book['author_sort'] = book['author_sort']
|
this_book['author_sort'] = book['author_sort']
|
||||||
this_book['read'] = book['read']
|
this_book['read'] = book['read']
|
||||||
this_book['id'] = book['id']
|
this_book['id'] = book['id']
|
||||||
|
this_book['series'] = book['series']
|
||||||
normalized_tag = self.genre_tags_dict[friendly_tag]
|
normalized_tag = self.genre_tags_dict[friendly_tag]
|
||||||
genre_tag_list = [key for genre in genre_list for key in genre]
|
genre_tag_list = [key for genre in genre_list for key in genre]
|
||||||
if normalized_tag in genre_tag_list:
|
if normalized_tag in genre_tag_list:
|
||||||
@ -2578,6 +2609,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
dtc = 0
|
dtc = 0
|
||||||
|
|
||||||
current_author = ''
|
current_author = ''
|
||||||
|
current_series = None
|
||||||
for book in books:
|
for book in books:
|
||||||
if book['author'] != current_author:
|
if book['author'] != current_author:
|
||||||
# Start a new author with link
|
# Start a new author with link
|
||||||
@ -2593,6 +2625,19 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pAuthorTag)
|
divTag.insert(dtc,pAuthorTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
|
# Check for series
|
||||||
|
if book['series'] and book['series'] != current_series:
|
||||||
|
# Start a new series
|
||||||
|
current_series = book['series']
|
||||||
|
pSeriesTag = Tag(soup,'p')
|
||||||
|
pSeriesTag['class'] = "series"
|
||||||
|
pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + book['series']))
|
||||||
|
divTag.insert(dtc,pSeriesTag)
|
||||||
|
dtc += 1
|
||||||
|
|
||||||
|
if current_series and not book['series']:
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Add books
|
# Add books
|
||||||
pBookTag = Tag(soup, "p")
|
pBookTag = Tag(soup, "p")
|
||||||
ptc = 0
|
ptc = 0
|
||||||
@ -2609,6 +2654,10 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
# Add the book title
|
# Add the book title
|
||||||
aTag = Tag(soup, "a")
|
aTag = Tag(soup, "a")
|
||||||
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
||||||
|
# Use series, series index if avail else just title
|
||||||
|
if current_series:
|
||||||
|
aTag.insert(0,escape(book['title'][len(book['series'])+1:]))
|
||||||
|
else:
|
||||||
aTag.insert(0,escape(book['title']))
|
aTag.insert(0,escape(book['title']))
|
||||||
pBookTag.insert(ptc, aTag)
|
pBookTag.insert(ptc, aTag)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
@ -2879,9 +2928,41 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
return char
|
return char
|
||||||
|
|
||||||
def markdownComments(self, comments):
|
def markdownComments(self, comments):
|
||||||
''' Convert random comment text to normalized, xml-legal block of <p>s'''
|
'''
|
||||||
|
Convert random comment text to normalized, xml-legal block of <p>s
|
||||||
|
'plain text' returns as
|
||||||
|
<p>plain text</p>
|
||||||
|
|
||||||
comments = comments.replace('\r', '')
|
'plain text with <i>minimal</i> <b>markup</b>' returns as
|
||||||
|
<p>plain text with <i>minimal</i> <b>markup</b></p>
|
||||||
|
|
||||||
|
'<p>pre-formatted text</p> returns untouched
|
||||||
|
|
||||||
|
'A line of text\n\nFollowed by a line of text' returns as
|
||||||
|
<p>A line of text</p>
|
||||||
|
<p>Followed by a line of text</p>
|
||||||
|
|
||||||
|
'A line of text.\nA second line of text.\rA third line of text' returns as
|
||||||
|
<p>A line of text.<br />A second line of text.<br />A third line of text.</p>
|
||||||
|
|
||||||
|
'...end of a paragraph.Somehow the break was lost...' returns as
|
||||||
|
<p>...end of a paragraph.</p>
|
||||||
|
<p>Somehow the break was lost...</p>
|
||||||
|
|
||||||
|
Deprecated HTML returns as HTML via BeautifulSoup()
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Explode lost CRs to \n\n
|
||||||
|
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||||
|
# confusion with decimal points.
|
||||||
|
for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])',comments):
|
||||||
|
comments = comments.replace(lost_cr.group(),
|
||||||
|
'%s%s\n\n%s' % (lost_cr.group(1),
|
||||||
|
lost_cr.group(2),
|
||||||
|
lost_cr.group(3)))
|
||||||
|
|
||||||
|
# Convert \n\n to <p>s
|
||||||
if re.search('\n\n', comments):
|
if re.search('\n\n', comments):
|
||||||
soup = BeautifulSoup()
|
soup = BeautifulSoup()
|
||||||
split_ps = comments.split('\n\n')
|
split_ps = comments.split('\n\n')
|
||||||
@ -2891,7 +2972,11 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
pTag.insert(0,p)
|
pTag.insert(0,p)
|
||||||
soup.insert(tsc,pTag)
|
soup.insert(tsc,pTag)
|
||||||
tsc += 1
|
tsc += 1
|
||||||
else:
|
comments = soup.renderContents()
|
||||||
|
|
||||||
|
# Convert solo returns to <br />
|
||||||
|
comments = re.sub('[\r\n]','<br />', comments)
|
||||||
|
|
||||||
soup = BeautifulSoup(comments)
|
soup = BeautifulSoup(comments)
|
||||||
|
|
||||||
result = BeautifulSoup()
|
result = BeautifulSoup()
|
||||||
@ -3001,6 +3086,12 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
'CLI' if opts.cli_environment else 'GUI'))
|
'CLI' if opts.cli_environment else 'GUI'))
|
||||||
if opts_dict['ids']:
|
if opts_dict['ids']:
|
||||||
log(" Book count: %d" % len(opts_dict['ids']))
|
log(" Book count: %d" % len(opts_dict['ids']))
|
||||||
|
|
||||||
|
# If exclude_genre is blank, assume user wants all genre tags included
|
||||||
|
if opts.exclude_genre.strip() == '':
|
||||||
|
opts.exclude_genre = '\[^.\]'
|
||||||
|
log(" converting empty exclude_genre to '\[^.\]'")
|
||||||
|
|
||||||
# Display opts
|
# Display opts
|
||||||
keys = opts_dict.keys()
|
keys = opts_dict.keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user