mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
3afcb3b2a8
@ -27,7 +27,7 @@ p.tags {
|
|||||||
|
|
||||||
p.description {
|
p.description {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
font-style:italic;
|
font-style:normal;
|
||||||
margin-top: 0em;
|
margin-top: 0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,6 +55,14 @@ p.author_index {
|
|||||||
text-indent: 0em;
|
text-indent: 0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.series {
|
||||||
|
text-align: left;
|
||||||
|
margin-top:0px;
|
||||||
|
margin-bottom:0px;
|
||||||
|
margin-left:2em;
|
||||||
|
text-indent:-2em;
|
||||||
|
}
|
||||||
|
|
||||||
p.read_book {
|
p.read_book {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
margin-top:0px;
|
margin-top:0px;
|
||||||
|
BIN
resources/images/news/digitalspy_uk.png
Normal file
BIN
resources/images/news/digitalspy_uk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/elcomercio.png
Normal file
BIN
resources/images/news/elcomercio.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 764 B |
BIN
resources/images/news/gizmodo.png
Normal file
BIN
resources/images/news/gizmodo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 640 B |
BIN
resources/images/news/newsstraitstimes.png
Normal file
BIN
resources/images/news/newsstraitstimes.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 816 B |
BIN
resources/images/news/readitlater.png
Normal file
BIN
resources/images/news/readitlater.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 810 B |
BIN
resources/images/news/tidbits.png
Normal file
BIN
resources/images/news/tidbits.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 783 B |
45
resources/recipes/ZIVE.sk.recipe
Normal file
45
resources/recipes/ZIVE.sk.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ZiveRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Abelturd'
|
||||||
|
language = 'sk'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'ZIVE.sk'
|
||||||
|
publisher = u''
|
||||||
|
category = u'News, Newspaper'
|
||||||
|
description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
43
resources/recipes/digitalspy_uk.recipe
Normal file
43
resources/recipes/digitalspy_uk.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.digitalspy.co.uk
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DigitalSpyUK(BasicNewsRecipe):
|
||||||
|
title = 'Digital Spy - UK Edition'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
|
||||||
|
publisher = 'Digital Spy Limited.'
|
||||||
|
category = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_GB'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['link'])]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
keep_only_tags = [dict(name='div',attrs={'id':'article'})]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml' )
|
||||||
|
,(u'Big Brother' , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml' )
|
||||||
|
,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
|
||||||
|
,(u'General' , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml' )
|
||||||
|
,(u'Media' , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml' )
|
||||||
|
]
|
||||||
|
|
38
resources/recipes/elcomercio.recipe
Normal file
38
resources/recipes/elcomercio.recipe
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
elcomercio.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElComercio(BasicNewsRecipe):
|
||||||
|
title = 'El Comercio '
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||||
|
publisher = 'GRUPO EL COMERCIO C.A.'
|
||||||
|
category = 'news, Ecuador, politics'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'es'
|
||||||
|
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
40
resources/recipes/gizmodo.recipe
Normal file
40
resources/recipes/gizmodo.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
gizmodo.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gizmodo(BasicNewsRecipe):
|
||||||
|
title = 'Gizmodo'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||||
|
publisher = 'gizmodo.com'
|
||||||
|
category = 'news, IT, Internet, gadgets'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'en'
|
||||||
|
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
|
||||||
|
extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='div',attrs={'class':'feedflare'})]
|
||||||
|
remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||||
'mailingListTout', 'partnerCenter', 'pageFooter']),
|
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||||
|
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||||
dict(name='iframe')]
|
dict(name='iframe')]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
||||||
|
47
resources/recipes/iliteratura_cz.recipe
Normal file
47
resources/recipes/iliteratura_cz.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class SmeRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Abelturd'
|
||||||
|
language = 'cz'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'iLiteratura.cz'
|
||||||
|
publisher = u''
|
||||||
|
category = u'News, Newspaper'
|
||||||
|
description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
|
||||||
|
cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
|
||||||
|
remove_tags = [dict(name='table'),dict(name='h3')]
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
m = re.search('(?<=ID=)[0-9]*', url)
|
||||||
|
|
||||||
|
return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
'''
|
@ -4,7 +4,7 @@ class Metro_Montreal(BasicNewsRecipe):
|
|||||||
|
|
||||||
title = u'M\xe9tro Montr\xe9al'
|
title = u'M\xe9tro Montr\xe9al'
|
||||||
__author__ = 'Jerry Clapperton'
|
__author__ = 'Jerry Clapperton'
|
||||||
description = 'Le quotidien le plus branché sur le monde'
|
description = 'Le quotidien le plus branch\xe9 sur le monde'
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -16,7 +16,7 @@ class Metro_Montreal(BasicNewsRecipe):
|
|||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'id':'buttons'})]
|
remove_tags = [dict(attrs={'id':'buttons'})]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u"L'info", u'http://journalmetro.com/linfo/rss'),
|
(u"L'info", u'http://journalmetro.com/linfo/rss'),
|
||||||
(u'Monde', u'http://journalmetro.com/monde/rss'),
|
(u'Monde', u'http://journalmetro.com/monde/rss'),
|
||||||
@ -26,4 +26,4 @@ class Metro_Montreal(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('article', 'ArticlePrint') + '?language=fr'
|
return url.replace('article', 'ArticlePrint') + '?language=fr'
|
||||||
|
35
resources/recipes/newsstraitstimes.recipe
Normal file
35
resources/recipes/newsstraitstimes.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.nst.com.my
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Newstraitstimes(BasicNewsRecipe):
|
||||||
|
title = 'New Straits Times from Malaysia'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
|
||||||
|
publisher = 'nst.com.my'
|
||||||
|
category = 'news, politics, Malaysia'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['link','table'])]
|
||||||
|
keep_only_tags = dict(name='div',attrs={'id':'haidah'})
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
|
||||||
|
|
@ -1,13 +1,12 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
pagina12.com.ar
|
pagina12.com.ar
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import time
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
class Pagina12(BasicNewsRecipe):
|
||||||
title = 'Pagina - 12'
|
title = 'Pagina - 12'
|
||||||
@ -22,7 +21,8 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = ' body{font-family: sans-serif} '
|
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -52,7 +52,11 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
|
rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
|
||||||
weekday = time.localtime().tm_wday
|
rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
|
||||||
return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
|
soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
|
||||||
|
for image in soup.findAll('img',alt=True):
|
||||||
|
if image['alt'].startswith('Tapa de la fecha'):
|
||||||
|
return image['src']
|
||||||
|
return None
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ class PeopleMag(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class': 'panel_news_article_main'}),
|
dict(name='div', attrs={'class': 'panel_news_article_main'}),
|
||||||
dict(name='div', attrs={'class':'article_content'}),
|
dict(name='div', attrs={'class':'article_content'}),
|
||||||
dict(name='div', attrs={'class': 'headline'}),
|
dict(name='div', attrs={'class': 'headline'}),
|
||||||
dict(name='div', attrs={'class': 'post'}),
|
dict(name='div', attrs={'class': 'post'}),
|
||||||
@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'sharelinkcont'}),
|
dict(name='div', attrs={'class':'sharelinkcont'}),
|
||||||
dict(name='div', attrs={'class':'categories'}),
|
dict(name='div', attrs={'class':'categories'}),
|
||||||
dict(name='ul', attrs={'class':'categories'}),
|
dict(name='ul', attrs={'class':'categories'}),
|
||||||
|
dict(name='div', attrs={'class':'related_content'}),
|
||||||
dict(name='div', attrs={'id':'promo'}),
|
dict(name='div', attrs={'id':'promo'}),
|
||||||
dict(name='div', attrs={'class':'linksWrapper'}),
|
dict(name='div', attrs={'class':'linksWrapper'}),
|
||||||
dict(name='p', attrs={'class':'tag tvnews'}),
|
dict(name='p', attrs={'class':'tag tvnews'}),
|
||||||
|
64
resources/recipes/readitlater.recipe
Normal file
64
resources/recipes/readitlater.recipe
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
readitlaterlist.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Readitlater(BasicNewsRecipe):
|
||||||
|
title = 'Read It Later'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||||
|
setup up your news. Fill in your account
|
||||||
|
username, and optionally you can add password.'''
|
||||||
|
publisher = 'readitlater.com'
|
||||||
|
category = 'news, custom'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
needs_subscription = True
|
||||||
|
INDEX = u'http://readitlaterlist.com'
|
||||||
|
LOGIN = INDEX + u'/l'
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Unread articles' , INDEX + u'/unread')]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None:
|
||||||
|
br.open(self.LOGIN)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br['feed_id'] = self.username
|
||||||
|
if self.password is not None:
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
ritem = soup.find('ul',attrs={'id':'list'})
|
||||||
|
for item in ritem.findAll('li'):
|
||||||
|
description = ''
|
||||||
|
atag = item.find('a',attrs={'class':'text'})
|
||||||
|
if atag and atag.has_key('href'):
|
||||||
|
url = self.INDEX + atag['href']
|
||||||
|
title = self.tag_to_string(item.div)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
@ -1,22 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class The_Gazette(BasicNewsRecipe):
|
|
||||||
|
|
||||||
cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
|
|
||||||
title = u'The Gazette'
|
|
||||||
__author__ = 'Jerry Clapperton'
|
|
||||||
description = 'Montreal news in English'
|
|
||||||
language = 'en_CA'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
|
|
||||||
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
|
|
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
||||||
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
||||||
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
||||||
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
||||||
('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
|
('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
|
||||||
('World', 'http://www.tnr.com/rss/articles/World'),
|
('World', 'http://www.tnr.com/rss/articles/World'),
|
||||||
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
||||||
('Books', 'http://www.tnr.com/rss/articles/books'),
|
('Books', 'http://www.tnr.com/rss/articles/books'),
|
||||||
|
('The Book', 'http://www.tnr.com/rss/book'),
|
||||||
|
('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
|
||||||
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
||||||
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
||||||
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
||||||
('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
|
|
||||||
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
||||||
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
||||||
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
||||||
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
||||||
|
|
||||||
|
53
resources/recipes/tidbits.recipe
Normal file
53
resources/recipes/tidbits.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
db.tidbits.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TidBITS(BasicNewsRecipe):
|
||||||
|
title = 'TidBITS: Mac News for the Rest of Us'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
|
||||||
|
publisher = 'TidBITS Publishing Inc.'
|
||||||
|
category = 'news, Apple, Macintosh, IT, Internet'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://db.tidbits.com/images/tblogo9.gif'
|
||||||
|
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='small')]
|
||||||
|
remove_tags_after = dict(name='small')
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Business Apps' , u'http://db.tidbits.com/feeds/business.rss' )
|
||||||
|
,(u'Entertainment' , u'http://db.tidbits.com/feeds/entertainment.rss')
|
||||||
|
,(u'External Links' , u'http://db.tidbits.com/feeds/links.rss' )
|
||||||
|
,(u'Home Mac' , u'http://db.tidbits.com/feeds/home.rss' )
|
||||||
|
,(u'Inside TidBITS' , u'http://db.tidbits.com/feeds/inside.rss' )
|
||||||
|
,(u'iPod & iPhone' , u'http://db.tidbits.com/feeds/ipod-iphone.rss' )
|
||||||
|
,(u'Just for Fun' , u'http://db.tidbits.com/feeds/fun.rss' )
|
||||||
|
,(u'Macs & Mac OS X' , u'http://db.tidbits.com/feeds/macs.rss' )
|
||||||
|
,(u'Media Creation' , u'http://db.tidbits.com/feeds/creative.rss' )
|
||||||
|
,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss' )
|
||||||
|
,(u'Opinion & Editorial' , u'http://db.tidbits.com/feeds/opinion.rss' )
|
||||||
|
,(u'Support & Problem Solving' , u'http://db.tidbits.com/feeds/support.rss' )
|
||||||
|
,(u'Safe Computing' , u'http://db.tidbits.com/feeds/security.rss' )
|
||||||
|
,(u'Tech News' , u'http://db.tidbits.com/feeds/tech.rss' )
|
||||||
|
,(u'Software Watchlist' , u'http://db.tidbits.com/feeds/watchlist.rss' )
|
||||||
|
]
|
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
# first, check if there is an h3 tag which provides a section name
|
# first, check if there is an h3 tag which provides a section name
|
||||||
stag = divtag.find('h3')
|
stag = divtag.find('h3')
|
||||||
if stag:
|
if stag:
|
||||||
if stag.parent['class'] == 'dynamic':
|
if stag.parent.get('class', '') == 'dynamic':
|
||||||
# a carousel of articles is too complex to extract a section name
|
# a carousel of articles is too complex to extract a section name
|
||||||
# for each article, so we'll just call the section "Carousel"
|
# for each article, so we'll just call the section "Carousel"
|
||||||
section_name = 'Carousel'
|
section_name = 'Carousel'
|
||||||
|
@ -48,7 +48,9 @@ class Resources(Command):
|
|||||||
dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
|
dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
|
||||||
if self.newer(dest, files):
|
if self.newer(dest, files):
|
||||||
self.info('\tCreating builtin_recipes.xml')
|
self.info('\tCreating builtin_recipes.xml')
|
||||||
open(dest, 'wb').write(serialize_builtin_recipes())
|
xml = serialize_builtin_recipes()
|
||||||
|
with open(dest, 'wb') as f:
|
||||||
|
f.write(xml)
|
||||||
|
|
||||||
dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
|
dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
|
||||||
files = []
|
files = []
|
||||||
|
@ -262,7 +262,6 @@ class Region(object):
|
|||||||
max_lines = max(max_lines, len(c))
|
max_lines = max(max_lines, len(c))
|
||||||
return max_lines
|
return max_lines
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_small(self):
|
def is_small(self):
|
||||||
return self.line_count < 3
|
return self.line_count < 3
|
||||||
@ -438,9 +437,8 @@ class Page(object):
|
|||||||
# absorb into a neighboring region (prefer the one with number of cols
|
# absorb into a neighboring region (prefer the one with number of cols
|
||||||
# closer to the avg number of cols in the set, if equal use larger
|
# closer to the avg number of cols in the set, if equal use larger
|
||||||
# region)
|
# region)
|
||||||
# merge contiguous regions that can contain each other
|
|
||||||
'''absorbed = set([])
|
|
||||||
found = True
|
found = True
|
||||||
|
absorbed = set([])
|
||||||
while found:
|
while found:
|
||||||
found = False
|
found = False
|
||||||
for i, region in enumerate(self.regions):
|
for i, region in enumerate(self.regions):
|
||||||
@ -452,10 +450,33 @@ class Page(object):
|
|||||||
regions.append(self.regions[j])
|
regions.append(self.regions[j])
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
prev = None if i == 0 else i-1
|
prev_region = None if i == 0 else i-1
|
||||||
next = j if self.regions[j] not in regions else None
|
next_region = j if self.regions[j] not in regions else None
|
||||||
'''
|
if prev_region is None and next_region is not None:
|
||||||
pass
|
absorb_into = next_region
|
||||||
|
elif next_region is None and prev_region is not None:
|
||||||
|
absorb_into = prev_region
|
||||||
|
elif prev_region is None and next_region is None:
|
||||||
|
if len(regions) > 1:
|
||||||
|
absorb_into = regions[0]
|
||||||
|
regions = regions[1:]
|
||||||
|
else:
|
||||||
|
absorb_into = None
|
||||||
|
else:
|
||||||
|
absorb_into = prev_region
|
||||||
|
if next_region.line_count >= prev_region.line_count:
|
||||||
|
avg_column_count = sum([len(r.columns) for r in
|
||||||
|
regions])/float(len(regions))
|
||||||
|
if next_region.line_count > prev_region.line_count \
|
||||||
|
or abs(avg_column_count - len(prev_region.columns)) \
|
||||||
|
> abs(avg_column_count - len(next_region.columns)):
|
||||||
|
absorb_into = next_region
|
||||||
|
if absorb_into is not None:
|
||||||
|
absorb_into.absorb_region(regions)
|
||||||
|
absorbed.update(regions)
|
||||||
|
i = j
|
||||||
|
for region in absorbed:
|
||||||
|
self.regions.remove(region)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ class Tokenize:
|
|||||||
return line
|
return line
|
||||||
def __compile_expressions(self):
|
def __compile_expressions(self):
|
||||||
self.__ms_hex_exp = re.compile(r"\\\'(..)")
|
self.__ms_hex_exp = re.compile(r"\\\'(..)")
|
||||||
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
|
self.__utf_exp = re.compile(r"\\u(-?\d{3,6})")
|
||||||
self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
|
self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
|
||||||
self.__par_exp = re.compile(r'\\$')
|
self.__par_exp = re.compile(r'\\$')
|
||||||
self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
||||||
|
@ -80,7 +80,7 @@
|
|||||||
<widget class="QLabel" name="label_6">
|
<widget class="QLabel" name="label_6">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Regex tips:
|
<string>Regex tips:
|
||||||
- The default regex - \[[\w]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
|
- The default regex - \[[\w ]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
|
||||||
- A regex pattern of a single dot excludes all genre tags, generating no Genre Section</string>
|
- A regex pattern of a single dot excludes all genre tags, generating no Genre Section</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="wordWrap">
|
<property name="wordWrap">
|
||||||
|
@ -57,7 +57,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
|
|||||||
setattr(opts,option, fmt_options[option])
|
setattr(opts,option, fmt_options[option])
|
||||||
|
|
||||||
# Fetch and run the plugin for fmt
|
# Fetch and run the plugin for fmt
|
||||||
|
# Returns 0 if successful, 1 if no catalog built
|
||||||
plugin = plugin_for_catalog_format(fmt)
|
plugin = plugin_for_catalog_format(fmt)
|
||||||
plugin.run(out_file_name, opts, db, notification=notification)
|
return plugin.run(out_file_name, opts, db, notification=notification)
|
||||||
|
|
||||||
|
|
||||||
|
@ -149,7 +149,7 @@ class DeviceManager(Thread):
|
|||||||
possibly_connected_devices.append((device, detected_device))
|
possibly_connected_devices.append((device, detected_device))
|
||||||
if possibly_connected_devices:
|
if possibly_connected_devices:
|
||||||
if not self.do_connect(possibly_connected_devices):
|
if not self.do_connect(possibly_connected_devices):
|
||||||
print 'Connect to device failed, retying in 5 seconds...'
|
print 'Connect to device failed, retrying in 5 seconds...'
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
if not self.do_connect(possibly_connected_devices):
|
if not self.do_connect(possibly_connected_devices):
|
||||||
print 'Device connect failed again, giving up'
|
print 'Device connect failed again, giving up'
|
||||||
|
@ -594,6 +594,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
self.rating.setValue(int(book.rating))
|
self.rating.setValue(int(book.rating))
|
||||||
if book.tags:
|
if book.tags:
|
||||||
self.tags.setText(', '.join(book.tags))
|
self.tags.setText(', '.join(book.tags))
|
||||||
|
if book.series is not None:
|
||||||
|
if self.series.text() is None or self.series.text() == '':
|
||||||
|
self.series.setText(book.series)
|
||||||
|
if book.series_index is not None:
|
||||||
|
self.series_index.setValue(book.series_index)
|
||||||
else:
|
else:
|
||||||
error_dialog(self, _('Cannot fetch metadata'),
|
error_dialog(self, _('Cannot fetch metadata'),
|
||||||
_('You must specify at least one of ISBN, Title, '
|
_('You must specify at least one of ISBN, Title, '
|
||||||
|
@ -903,9 +903,13 @@ class OnDeviceSearch(SearchQueryParser):
|
|||||||
locations[i] = q[v]
|
locations[i] = q[v]
|
||||||
for i, r in enumerate(self.model.db):
|
for i, r in enumerate(self.model.db):
|
||||||
for loc in locations:
|
for loc in locations:
|
||||||
if query in loc(r):
|
try:
|
||||||
matches.add(i)
|
if query in loc(r):
|
||||||
break
|
matches.add(i)
|
||||||
|
break
|
||||||
|
except ValueError: # Unicode errors
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
@ -1394,6 +1394,11 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
||||||
|
|
||||||
def catalog_generated(self, job):
|
def catalog_generated(self, job):
|
||||||
|
if job.result:
|
||||||
|
# Search terms nulled catalog results
|
||||||
|
return error_dialog(self, _('No books found'),
|
||||||
|
_("No books to catalog\nCheck exclude tags"),
|
||||||
|
show=True)
|
||||||
if job.failed:
|
if job.failed:
|
||||||
return self.job_exception(job)
|
return self.job_exception(job)
|
||||||
id = self.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
|
id = self.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
|
||||||
|
@ -927,8 +927,16 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
for record in data:
|
for record in data:
|
||||||
this_title = {}
|
this_title = {}
|
||||||
|
|
||||||
title = this_title['title'] = self.convertHTMLEntities(record['title'])
|
this_title['title'] = self.convertHTMLEntities(record['title'])
|
||||||
this_title['title_sort'] = self.generateSortTitle(title)
|
if record['series']:
|
||||||
|
this_title['series'] = record['series']
|
||||||
|
this_title['series_index'] = record['series_index']
|
||||||
|
this_title['title'] = self.generateSeriesTitle(this_title)
|
||||||
|
else:
|
||||||
|
this_title['series'] = None
|
||||||
|
this_title['series_index'] = 0.0
|
||||||
|
|
||||||
|
this_title['title_sort'] = self.generateSortTitle(this_title['title'])
|
||||||
if 'authors' in record and len(record['authors']):
|
if 'authors' in record and len(record['authors']):
|
||||||
this_title['author'] = " & ".join(record['authors'])
|
this_title['author'] = " & ".join(record['authors'])
|
||||||
else:
|
else:
|
||||||
@ -984,12 +992,59 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
|
|
||||||
def fetchBooksByAuthor(self):
|
def fetchBooksByAuthor(self):
|
||||||
# Generate a list of titles sorted by author from the database
|
# Generate a list of titles sorted by author from the database
|
||||||
|
def author_compare(x,y):
|
||||||
|
# Return -1 if x<y
|
||||||
|
# Return 0 if x==y
|
||||||
|
# Return 1 if x>y
|
||||||
|
# Different authors - sort by author_sort
|
||||||
|
if x['author_sort'] > y['author_sort']:
|
||||||
|
return 1
|
||||||
|
elif x['author_sort'] < y['author_sort']:
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
# Same author
|
||||||
|
if x['series'] != y['series']:
|
||||||
|
# Different series
|
||||||
|
if x['title_sort'].lstrip() > y['title_sort'].lstrip():
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
# Same series
|
||||||
|
if x['series'] == y['series']:
|
||||||
|
if float(x['series_index']) > float(y['series_index']):
|
||||||
|
return 1
|
||||||
|
elif float(x['series_index']) < float(y['series_index']):
|
||||||
|
return -1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
if x['series'] > y['series']:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
self.updateProgressFullStep("Sorting database")
|
self.updateProgressFullStep("Sorting database")
|
||||||
|
|
||||||
# Sort titles case-insensitive
|
'''
|
||||||
|
# Sort titles case-insensitive, by author
|
||||||
self.booksByAuthor = sorted(self.booksByTitle,
|
self.booksByAuthor = sorted(self.booksByTitle,
|
||||||
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
|
key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.booksByAuthor = list(self.booksByTitle)
|
||||||
|
self.booksByAuthor.sort(author_compare)
|
||||||
|
|
||||||
|
if False and self.verbose:
|
||||||
|
self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
|
||||||
|
self.opts.log.info(" %-30s %-20s %s" % ('title', 'title_sort','series', 'series_index'))
|
||||||
|
for title in self.booksByAuthor:
|
||||||
|
self.opts.log.info((u" %-30s %-20s %-20s%5s " % \
|
||||||
|
(title['title'][:30],
|
||||||
|
title['series'][:20] if title['series'] else '',
|
||||||
|
title['series_index'],
|
||||||
|
)).encode('utf-8'))
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
# Build the unique_authors set from existing data
|
# Build the unique_authors set from existing data
|
||||||
authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
|
authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
|
||||||
@ -1063,7 +1118,17 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
# Insert the book title
|
# Insert the book title
|
||||||
#<p class="title"><a name="<database_id>"></a><em>Book Title</em></p>
|
#<p class="title"><a name="<database_id>"></a><em>Book Title</em></p>
|
||||||
emTag = Tag(soup, "em")
|
emTag = Tag(soup, "em")
|
||||||
emTag.insert(0, NavigableString(escape(title['title'])))
|
if title['series']:
|
||||||
|
# title<br />series series_index
|
||||||
|
brTag = Tag(soup,'br')
|
||||||
|
title_tokens = title['title'].split(': ')
|
||||||
|
emTag.insert(0, NavigableString(title_tokens[1]))
|
||||||
|
emTag.insert(1, brTag)
|
||||||
|
smallTag = Tag(soup,'small')
|
||||||
|
smallTag.insert(0,NavigableString(title_tokens[0]))
|
||||||
|
emTag.insert(2, smallTag)
|
||||||
|
else:
|
||||||
|
emTag.insert(0, NavigableString(escape(title['title'])))
|
||||||
titleTag = body.find(attrs={'class':'title'})
|
titleTag = body.find(attrs={'class':'title'})
|
||||||
titleTag.insert(0,emTag)
|
titleTag.insert(0,emTag)
|
||||||
|
|
||||||
@ -1073,7 +1138,12 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor", self.generateAuthorAnchor(title['author']))
|
aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor", self.generateAuthorAnchor(title['author']))
|
||||||
#aTag.insert(0, escape(title['author']))
|
#aTag.insert(0, escape(title['author']))
|
||||||
aTag.insert(0, title['author'])
|
aTag.insert(0, title['author'])
|
||||||
authorTag.insert(0, NavigableString("by "))
|
|
||||||
|
# Insert READ_SYMBOL
|
||||||
|
if title['read']:
|
||||||
|
authorTag.insert(0, NavigableString(self.READ_SYMBOL + "by "))
|
||||||
|
else:
|
||||||
|
authorTag.insert(0, NavigableString(self.NOT_READ_SYMBOL + "by "))
|
||||||
authorTag.insert(1, aTag)
|
authorTag.insert(1, aTag)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -1085,6 +1155,27 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
tagsTag.insert(0,emTag)
|
tagsTag.insert(0,emTag)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
'''
|
||||||
|
# Insert Series info or remove.
|
||||||
|
seriesTag = body.find(attrs={'class':'series'})
|
||||||
|
if title['series']:
|
||||||
|
# Insert a spacer to match the author indent
|
||||||
|
stc = 0
|
||||||
|
fontTag = Tag(soup,"font")
|
||||||
|
fontTag['style'] = 'color:white;font-size:large'
|
||||||
|
if self.opts.fmt == 'epub':
|
||||||
|
fontTag['style'] += ';opacity: 0.0'
|
||||||
|
fontTag.insert(0, NavigableString("by "))
|
||||||
|
seriesTag.insert(stc, fontTag)
|
||||||
|
stc += 1
|
||||||
|
if float(title['series_index']) - int(title['series_index']):
|
||||||
|
series_str = 'Series: %s [%4.2f]' % (title['series'], title['series_index'])
|
||||||
|
else:
|
||||||
|
series_str = '%s [%d]' % (title['series'], title['series_index'])
|
||||||
|
seriesTag.insert(stc,NavigableString(series_str))
|
||||||
|
else:
|
||||||
|
seriesTag.extract()
|
||||||
|
'''
|
||||||
# Insert linked genres
|
# Insert linked genres
|
||||||
if 'tags' in title:
|
if 'tags' in title:
|
||||||
tagsTag = body.find(attrs={'class':'tags'})
|
tagsTag = body.find(attrs={'class':'tags'})
|
||||||
@ -1118,7 +1209,12 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
else:
|
else:
|
||||||
imgTag['src'] = "../images/thumbnail_default.jpg"
|
imgTag['src'] = "../images/thumbnail_default.jpg"
|
||||||
imgTag['alt'] = "cover"
|
imgTag['alt'] = "cover"
|
||||||
imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT)
|
|
||||||
|
# Tweak image size if we're building for Sony, not sure why this is needed
|
||||||
|
if self.opts.fmt == 'epub' and self.opts.output_profile.startswith("sony"):
|
||||||
|
imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH * 2, self.THUMB_HEIGHT * 2)
|
||||||
|
else:
|
||||||
|
imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT)
|
||||||
thumbnailTag = body.find(attrs={'class':'thumbnail'})
|
thumbnailTag = body.find(attrs={'class':'thumbnail'})
|
||||||
thumbnailTag.insert(0,imgTag)
|
thumbnailTag.insert(0,imgTag)
|
||||||
|
|
||||||
@ -1310,8 +1406,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
dtc = 0
|
dtc = 0
|
||||||
current_letter = ""
|
current_letter = ""
|
||||||
current_author = ""
|
current_author = ""
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Loop through books_by_author
|
# Loop through booksByAuthor
|
||||||
book_count = 0
|
book_count = 0
|
||||||
for book in self.booksByAuthor:
|
for book in self.booksByAuthor:
|
||||||
book_count += 1
|
book_count += 1
|
||||||
@ -1349,11 +1446,23 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pAuthorTag)
|
divTag.insert(dtc,pAuthorTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
|
# Check for series
|
||||||
|
if book['series'] and book['series'] != current_series:
|
||||||
|
# Start a new series
|
||||||
|
current_series = book['series']
|
||||||
|
pSeriesTag = Tag(soup,'p')
|
||||||
|
pSeriesTag['class'] = "series"
|
||||||
|
pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + book['series']))
|
||||||
|
divTag.insert(dtc,pSeriesTag)
|
||||||
|
dtc += 1
|
||||||
|
if current_series and not book['series']:
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Add books
|
# Add books
|
||||||
pBookTag = Tag(soup, "p")
|
pBookTag = Tag(soup, "p")
|
||||||
ptc = 0
|
ptc = 0
|
||||||
|
|
||||||
# Prefix book with read/unread symbol
|
# book with read/unread symbol
|
||||||
if book['read']:
|
if book['read']:
|
||||||
# check mark
|
# check mark
|
||||||
pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
|
pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
|
||||||
@ -1367,7 +1476,11 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
|
|
||||||
aTag = Tag(soup, "a")
|
aTag = Tag(soup, "a")
|
||||||
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
||||||
aTag.insert(0,escape(book['title']))
|
# Use series, series index if avail else just title
|
||||||
|
if current_series:
|
||||||
|
aTag.insert(0,escape(book['title'][len(book['series'])+1:]))
|
||||||
|
else:
|
||||||
|
aTag.insert(0,escape(book['title']))
|
||||||
pBookTag.insert(ptc, aTag)
|
pBookTag.insert(ptc, aTag)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
|
|
||||||
@ -1419,6 +1532,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pIndexTag)
|
divTag.insert(dtc,pIndexTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
current_author = None
|
current_author = None
|
||||||
|
current_series = None
|
||||||
|
|
||||||
for new_entry in this_months_list:
|
for new_entry in this_months_list:
|
||||||
if new_entry['author'] != current_author:
|
if new_entry['author'] != current_author:
|
||||||
@ -1435,6 +1549,18 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pAuthorTag)
|
divTag.insert(dtc,pAuthorTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
|
# Check for series
|
||||||
|
if new_entry['series'] and new_entry['series'] != current_series:
|
||||||
|
# Start a new series
|
||||||
|
current_series = new_entry['series']
|
||||||
|
pSeriesTag = Tag(soup,'p')
|
||||||
|
pSeriesTag['class'] = "series"
|
||||||
|
pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + new_entry['series']))
|
||||||
|
divTag.insert(dtc,pSeriesTag)
|
||||||
|
dtc += 1
|
||||||
|
if current_series and not new_entry['series']:
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Add books
|
# Add books
|
||||||
pBookTag = Tag(soup, "p")
|
pBookTag = Tag(soup, "p")
|
||||||
ptc = 0
|
ptc = 0
|
||||||
@ -1453,7 +1579,10 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
|
|
||||||
aTag = Tag(soup, "a")
|
aTag = Tag(soup, "a")
|
||||||
aTag['href'] = "book_%d.html" % (int(float(new_entry['id'])))
|
aTag['href'] = "book_%d.html" % (int(float(new_entry['id'])))
|
||||||
aTag.insert(0,escape(new_entry['title']))
|
if current_series:
|
||||||
|
aTag.insert(0,escape(new_entry['title'][len(new_entry['series'])+1:]))
|
||||||
|
else:
|
||||||
|
aTag.insert(0,escape(new_entry['title']))
|
||||||
pBookTag.insert(ptc, aTag)
|
pBookTag.insert(ptc, aTag)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
|
|
||||||
@ -1554,6 +1683,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
this_book['author_sort'] = book['author_sort']
|
this_book['author_sort'] = book['author_sort']
|
||||||
this_book['read'] = book['read']
|
this_book['read'] = book['read']
|
||||||
this_book['id'] = book['id']
|
this_book['id'] = book['id']
|
||||||
|
this_book['series'] = book['series']
|
||||||
normalized_tag = self.genre_tags_dict[friendly_tag]
|
normalized_tag = self.genre_tags_dict[friendly_tag]
|
||||||
genre_tag_list = [key for genre in genre_list for key in genre]
|
genre_tag_list = [key for genre in genre_list for key in genre]
|
||||||
if normalized_tag in genre_tag_list:
|
if normalized_tag in genre_tag_list:
|
||||||
@ -1579,7 +1709,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
|
|
||||||
for genre in genre_list:
|
for genre in genre_list:
|
||||||
for key in genre:
|
for key in genre:
|
||||||
self.opts.log.info(" %s: %d titles" % (key, len(genre[key])))
|
self.opts.log.info(" %s: %d %s" % (self.getFriendlyGenreTag(key),
|
||||||
|
len(genre[key]),
|
||||||
|
'titles' if len(genre[key]) > 1 else 'title'))
|
||||||
|
|
||||||
# Write the results
|
# Write the results
|
||||||
# genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
|
# genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
|
||||||
@ -1786,7 +1918,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
mtc += 1
|
mtc += 1
|
||||||
|
|
||||||
# HTML files - add books to manifest and spine
|
# HTML files - add books to manifest and spine
|
||||||
for book in self.booksByTitle:
|
sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \
|
||||||
|
else self.booksByTitle
|
||||||
|
for book in sort_descriptions_by:
|
||||||
# manifest
|
# manifest
|
||||||
itemTag = Tag(soup, "item")
|
itemTag = Tag(soup, "item")
|
||||||
itemTag['href'] = "content/book_%d.html" % int(book['id'])
|
itemTag['href'] = "content/book_%d.html" % int(book['id'])
|
||||||
@ -1912,7 +2046,9 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
nptc += 1
|
nptc += 1
|
||||||
|
|
||||||
# Loop over the titles
|
# Loop over the titles
|
||||||
for book in self.booksByTitle:
|
sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \
|
||||||
|
else self.booksByTitle
|
||||||
|
for book in sort_descriptions_by:
|
||||||
navPointVolumeTag = Tag(ncx_soup, 'navPoint')
|
navPointVolumeTag = Tag(ncx_soup, 'navPoint')
|
||||||
navPointVolumeTag['class'] = "article"
|
navPointVolumeTag['class'] = "article"
|
||||||
navPointVolumeTag['id'] = "book%dID" % int(book['id'])
|
navPointVolumeTag['id'] = "book%dID" % int(book['id'])
|
||||||
@ -1920,7 +2056,11 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
self.playOrder += 1
|
self.playOrder += 1
|
||||||
navLabelTag = Tag(ncx_soup, "navLabel")
|
navLabelTag = Tag(ncx_soup, "navLabel")
|
||||||
textTag = Tag(ncx_soup, "text")
|
textTag = Tag(ncx_soup, "text")
|
||||||
textTag.insert(0, NavigableString(self.formatNCXText(book['title'])))
|
if book['series']:
|
||||||
|
tokens = book['title'].split(': ')
|
||||||
|
textTag.insert(0, NavigableString(self.formatNCXText('%s (%s)' % (tokens[1], tokens[0]))))
|
||||||
|
else:
|
||||||
|
textTag.insert(0, NavigableString(self.formatNCXText(book['title'])))
|
||||||
navLabelTag.insert(0,textTag)
|
navLabelTag.insert(0,textTag)
|
||||||
navPointVolumeTag.insert(0,navLabelTag)
|
navPointVolumeTag.insert(0,navLabelTag)
|
||||||
|
|
||||||
@ -2426,15 +2566,25 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
else:
|
else:
|
||||||
yield tag
|
yield tag
|
||||||
|
|
||||||
self.opts.log.info(u' %d available genre tags in database (exclude_genre: %s):' % \
|
self.opts.log.info(u' %d genre tags in database (excluding genres matching %s):' % \
|
||||||
(len(genre_tags_dict), self.opts.exclude_genre))
|
(len(genre_tags_dict), self.opts.exclude_genre))
|
||||||
|
|
||||||
# Display friendly/normalized genres
|
# Display friendly/normalized genres
|
||||||
# friendly => normalized
|
# friendly => normalized
|
||||||
sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
|
if False:
|
||||||
|
sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
|
||||||
for tag in next_tag(sorted_tags):
|
for tag in next_tag(sorted_tags):
|
||||||
self.opts.log(u' %s' % tag)
|
self.opts.log(u' %s' % tag)
|
||||||
|
else:
|
||||||
|
sorted_tags = ['%s' % (key) for key in sorted(genre_tags_dict.keys())]
|
||||||
|
out_str = ''
|
||||||
|
line_break = 70
|
||||||
|
for tag in next_tag(sorted_tags):
|
||||||
|
out_str += tag
|
||||||
|
if len(out_str) >= line_break:
|
||||||
|
self.opts.log.info(' %s' % out_str)
|
||||||
|
out_str = ''
|
||||||
|
self.opts.log.info(' %s' % out_str)
|
||||||
|
|
||||||
return genre_tags_dict
|
return genre_tags_dict
|
||||||
|
|
||||||
@ -2474,19 +2624,15 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
body.insert(btc,aTag)
|
body.insert(btc,aTag)
|
||||||
btc += 1
|
btc += 1
|
||||||
|
|
||||||
# Find the first instance of friendly_tag matching genre
|
|
||||||
for friendly_tag in self.genre_tags_dict:
|
|
||||||
if self.genre_tags_dict[friendly_tag] == genre:
|
|
||||||
break
|
|
||||||
|
|
||||||
titleTag = body.find(attrs={'class':'title'})
|
titleTag = body.find(attrs={'class':'title'})
|
||||||
titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(friendly_tag)))
|
titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(self.getFriendlyGenreTag(genre))))
|
||||||
|
|
||||||
# Insert the books by author list
|
# Insert the books by author list
|
||||||
divTag = body.find(attrs={'class':'authors'})
|
divTag = body.find(attrs={'class':'authors'})
|
||||||
dtc = 0
|
dtc = 0
|
||||||
|
|
||||||
current_author = ''
|
current_author = ''
|
||||||
|
current_series = None
|
||||||
for book in books:
|
for book in books:
|
||||||
if book['author'] != current_author:
|
if book['author'] != current_author:
|
||||||
# Start a new author with link
|
# Start a new author with link
|
||||||
@ -2502,6 +2648,19 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
divTag.insert(dtc,pAuthorTag)
|
divTag.insert(dtc,pAuthorTag)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
|
# Check for series
|
||||||
|
if book['series'] and book['series'] != current_series:
|
||||||
|
# Start a new series
|
||||||
|
current_series = book['series']
|
||||||
|
pSeriesTag = Tag(soup,'p')
|
||||||
|
pSeriesTag['class'] = "series"
|
||||||
|
pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + book['series']))
|
||||||
|
divTag.insert(dtc,pSeriesTag)
|
||||||
|
dtc += 1
|
||||||
|
|
||||||
|
if current_series and not book['series']:
|
||||||
|
current_series = None
|
||||||
|
|
||||||
# Add books
|
# Add books
|
||||||
pBookTag = Tag(soup, "p")
|
pBookTag = Tag(soup, "p")
|
||||||
ptc = 0
|
ptc = 0
|
||||||
@ -2518,7 +2677,11 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
# Add the book title
|
# Add the book title
|
||||||
aTag = Tag(soup, "a")
|
aTag = Tag(soup, "a")
|
||||||
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
aTag['href'] = "book_%d.html" % (int(float(book['id'])))
|
||||||
aTag.insert(0,escape(book['title']))
|
# Use series, series index if avail else just title
|
||||||
|
if current_series:
|
||||||
|
aTag.insert(0,escape(book['title'][len(book['series'])+1:]))
|
||||||
|
else:
|
||||||
|
aTag.insert(0,escape(book['title']))
|
||||||
pBookTag.insert(ptc, aTag)
|
pBookTag.insert(ptc, aTag)
|
||||||
ptc += 1
|
ptc += 1
|
||||||
|
|
||||||
@ -2553,6 +2716,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
<p class="title"></p>
|
<p class="title"></p>
|
||||||
{0}
|
{0}
|
||||||
<p class="author"></p>
|
<p class="author"></p>
|
||||||
|
<!--p class="series"></p-->
|
||||||
<p class="tags"> </p>
|
<p class="tags"> </p>
|
||||||
<table width="100%" border="0">
|
<table width="100%" border="0">
|
||||||
<tr>
|
<tr>
|
||||||
@ -2678,6 +2842,17 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
draw.text((left, top), text, fill=(0,0,0), font=font)
|
draw.text((left, top), text, fill=(0,0,0), font=font)
|
||||||
img.save(open(out_path, 'wb'), 'GIF')
|
img.save(open(out_path, 'wb'), 'GIF')
|
||||||
|
|
||||||
|
def generateSeriesTitle(self, title):
|
||||||
|
if float(title['series_index']) - int(title['series_index']):
|
||||||
|
series_title = '%s %4.2f: %s' % (title['series'],
|
||||||
|
title['series_index'],
|
||||||
|
title['title'])
|
||||||
|
else:
|
||||||
|
series_title = '%s %d: %s' % (title['series'],
|
||||||
|
title['series_index'],
|
||||||
|
title['title'])
|
||||||
|
return series_title
|
||||||
|
|
||||||
def generateShortDescription(self, description):
|
def generateShortDescription(self, description):
|
||||||
# Truncate the description to description_clip, on word boundaries if necessary
|
# Truncate the description to description_clip, on word boundaries if necessary
|
||||||
if not description:
|
if not description:
|
||||||
@ -2775,33 +2950,115 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
else:
|
else:
|
||||||
return char
|
return char
|
||||||
|
|
||||||
|
def getFriendlyGenreTag(self, genre):
|
||||||
|
# Find the first instance of friendly_tag matching genre
|
||||||
|
for friendly_tag in self.genre_tags_dict:
|
||||||
|
if self.genre_tags_dict[friendly_tag] == genre:
|
||||||
|
return friendly_tag
|
||||||
|
|
||||||
def markdownComments(self, comments):
|
def markdownComments(self, comments):
|
||||||
''' Convert random comment text to normalized, xml-legal block of <p>s'''
|
'''
|
||||||
# reformat illegal xml
|
Convert random comment text to normalized, xml-legal block of <p>s
|
||||||
desc = prepare_string_for_xml(comments)
|
'plain text' returns as
|
||||||
|
<p>plain text</p>
|
||||||
|
|
||||||
# normalize <br/> tags
|
'plain text with <i>minimal</i> <b>markup</b>' returns as
|
||||||
desc = re.sub(r'<br[/]{0,1}>', '<br/>', desc)
|
<p>plain text with <i>minimal</i> <b>markup</b></p>
|
||||||
|
|
||||||
# tokenize double line breaks
|
'<p>pre-formatted text</p> returns untouched
|
||||||
desc = comments.replace('\r', '')
|
|
||||||
tokens = comments.split('\n\n')
|
|
||||||
|
|
||||||
soup = BeautifulSoup()
|
'A line of text\n\nFollowed by a line of text' returns as
|
||||||
ptc = 0
|
<p>A line of text</p>
|
||||||
for token in tokens:
|
<p>Followed by a line of text</p>
|
||||||
pTag = Tag(soup, 'p')
|
|
||||||
pTag.insert(0,token)
|
'A line of text.\nA second line of text.\rA third line of text' returns as
|
||||||
soup.insert(ptc, pTag)
|
<p>A line of text.<br />A second line of text.<br />A third line of text.</p>
|
||||||
ptc += 1
|
|
||||||
return soup.renderContents(encoding=None)
|
'...end of a paragraph.Somehow the break was lost...' returns as
|
||||||
|
<p>...end of a paragraph.</p>
|
||||||
|
<p>Somehow the break was lost...</p>
|
||||||
|
|
||||||
|
Deprecated HTML returns as HTML via BeautifulSoup()
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Explode lost CRs to \n\n
|
||||||
|
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||||
|
# confusion with decimal points.
|
||||||
|
for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])',comments):
|
||||||
|
comments = comments.replace(lost_cr.group(),
|
||||||
|
'%s%s\n\n%s' % (lost_cr.group(1),
|
||||||
|
lost_cr.group(2),
|
||||||
|
lost_cr.group(3)))
|
||||||
|
|
||||||
|
# Convert \n\n to <p>s
|
||||||
|
if re.search('\n\n', comments):
|
||||||
|
soup = BeautifulSoup()
|
||||||
|
split_ps = comments.split('\n\n')
|
||||||
|
tsc = 0
|
||||||
|
for p in split_ps:
|
||||||
|
pTag = Tag(soup,'p')
|
||||||
|
pTag.insert(0,p)
|
||||||
|
soup.insert(tsc,pTag)
|
||||||
|
tsc += 1
|
||||||
|
comments = soup.renderContents()
|
||||||
|
|
||||||
|
# Convert solo returns to <br />
|
||||||
|
comments = re.sub('[\r\n]','<br />', comments)
|
||||||
|
|
||||||
|
soup = BeautifulSoup(comments)
|
||||||
|
|
||||||
|
result = BeautifulSoup()
|
||||||
|
rtc = 0
|
||||||
|
open_pTag = False
|
||||||
|
|
||||||
|
all_tokens = list(soup.contents)
|
||||||
|
for token in all_tokens:
|
||||||
|
if type(token) is NavigableString:
|
||||||
|
if not open_pTag:
|
||||||
|
pTag = Tag(result,'p')
|
||||||
|
open_pTag = True
|
||||||
|
ptc = 0
|
||||||
|
pTag.insert(ptc,prepare_string_for_xml(token))
|
||||||
|
ptc += 1
|
||||||
|
|
||||||
|
elif token.name in ['br','b','i']:
|
||||||
|
if not open_pTag:
|
||||||
|
pTag = Tag(result,'p')
|
||||||
|
open_pTag = True
|
||||||
|
ptc = 0
|
||||||
|
pTag.insert(ptc, token)
|
||||||
|
ptc += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
if open_pTag:
|
||||||
|
result.insert(rtc, pTag)
|
||||||
|
rtc += 1
|
||||||
|
open_pTag = False
|
||||||
|
ptc = 0
|
||||||
|
# Clean up NavigableStrings for xml
|
||||||
|
sub_tokens = list(token.contents)
|
||||||
|
for sub_token in sub_tokens:
|
||||||
|
if type(sub_token) is NavigableString:
|
||||||
|
sub_token.replaceWith(prepare_string_for_xml(sub_token))
|
||||||
|
result.insert(rtc, token)
|
||||||
|
rtc += 1
|
||||||
|
|
||||||
|
if open_pTag:
|
||||||
|
result.insert(rtc, pTag)
|
||||||
|
|
||||||
|
paras = result.findAll('p')
|
||||||
|
for p in paras:
|
||||||
|
p['class'] = 'description'
|
||||||
|
|
||||||
|
return result.renderContents(encoding=None)
|
||||||
|
|
||||||
def processSpecialTags(self, tags, this_title, opts):
|
def processSpecialTags(self, tags, this_title, opts):
|
||||||
tag_list = []
|
tag_list = []
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
tag = self.convertHTMLEntities(tag)
|
tag = self.convertHTMLEntities(tag)
|
||||||
if tag.startswith(opts.note_tag):
|
if tag.startswith(opts.note_tag):
|
||||||
this_title['notes'] = tag[1:]
|
this_title['notes'] = tag[len(self.opts.note_tag):]
|
||||||
elif tag == opts.read_tag:
|
elif tag == opts.read_tag:
|
||||||
this_title['read'] = True
|
this_title['read'] = True
|
||||||
elif re.search(opts.exclude_genre, tag):
|
elif re.search(opts.exclude_genre, tag):
|
||||||
@ -2847,6 +3104,8 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
opts.basename = "Catalog"
|
opts.basename = "Catalog"
|
||||||
opts.plugin_path = self.plugin_path
|
opts.plugin_path = self.plugin_path
|
||||||
opts.cli_environment = not hasattr(opts,'sync')
|
opts.cli_environment = not hasattr(opts,'sync')
|
||||||
|
# GwR *** hardwired to sort by author, could be an option if passed in opts
|
||||||
|
opts.sort_descriptions_by_author = True
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
opts_dict = vars(opts)
|
opts_dict = vars(opts)
|
||||||
@ -2855,15 +3114,30 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
'CLI' if opts.cli_environment else 'GUI'))
|
'CLI' if opts.cli_environment else 'GUI'))
|
||||||
if opts_dict['ids']:
|
if opts_dict['ids']:
|
||||||
log(" Book count: %d" % len(opts_dict['ids']))
|
log(" Book count: %d" % len(opts_dict['ids']))
|
||||||
|
|
||||||
|
sections_list = ['Descriptions','Authors']
|
||||||
|
if opts.generate_titles:
|
||||||
|
sections_list.append('Titles')
|
||||||
|
if opts.generate_recently_added:
|
||||||
|
sections_list.append('Recently Added')
|
||||||
|
if not opts.exclude_genre.strip() == '.':
|
||||||
|
sections_list.append('Genres')
|
||||||
|
log(u"Creating Sections for %s" % ', '.join(sections_list))
|
||||||
|
|
||||||
|
# If exclude_genre is blank, assume user wants all genre tags included
|
||||||
|
if opts.exclude_genre.strip() == '':
|
||||||
|
opts.exclude_genre = '\[^.\]'
|
||||||
|
log(" converting empty exclude_genre to '\[^.\]'")
|
||||||
|
|
||||||
# Display opts
|
# Display opts
|
||||||
keys = opts_dict.keys()
|
keys = opts_dict.keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
log(" opts:")
|
log(" opts:")
|
||||||
|
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles',
|
if key in ['catalog_title','exclude_genre','exclude_tags',
|
||||||
'generate_recently_added','note_tag','numbers_as_text','read_tag',
|
'note_tag','numbers_as_text','read_tag',
|
||||||
'search_text','sort_by','sync']:
|
'search_text','sort_by','sort_descriptions_by_author','sync']:
|
||||||
log(" %s: %s" % (key, opts_dict[key]))
|
log(" %s: %s" % (key, opts_dict[key]))
|
||||||
|
|
||||||
# Launch the Catalog builder
|
# Launch the Catalog builder
|
||||||
|
@ -62,7 +62,7 @@ How do I convert my file containing non-English characters, or smart quotes?
|
|||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
There are two aspects to this problem:
|
There are two aspects to this problem:
|
||||||
1. Knowing the encoding of the source file: |app| tries to guess what character encoding your source files use, but often, this is impossible, so you need to tell it what encoding to use. This can be done in the GUI via the :guilabel:`Input character encoding` field in the :guilabel:`Look & Feel` section. The command-line tools all have an :option:`--input-encoding` option.
|
1. Knowing the encoding of the source file: |app| tries to guess what character encoding your source files use, but often, this is impossible, so you need to tell it what encoding to use. This can be done in the GUI via the :guilabel:`Input character encoding` field in the :guilabel:`Look & Feel` section. The command-line tools all have an :option:`--input-encoding` option.
|
||||||
2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first.
|
2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first. Note that when converting HTML files, leave the input encoding setting mentioned above blank. This is because the HTML2ZIP plugin automatically converts the HTML files to a standard encoding (utf-8).
|
||||||
3. Embedding fonts: If you are generating an LRF file to read on your SONY Reader, you are limited by the fact that the Reader only supports a few non-English characters in the fonts it comes pre-loaded with. You can work around this problem by embedding a unicode-aware font that supports the character set your file uses into the LRF file. You should embed atleast a serif and a sans-serif font. Be aware that embedding fonts significantly slows down page-turn speed on the reader.
|
3. Embedding fonts: If you are generating an LRF file to read on your SONY Reader, you are limited by the fact that the Reader only supports a few non-English characters in the fonts it comes pre-loaded with. You can work around this problem by embedding a unicode-aware font that supports the character set your file uses into the LRF file. You should embed atleast a serif and a sans-serif font. Be aware that embedding fonts significantly slows down page-turn speed on the reader.
|
||||||
|
|
||||||
|
|
||||||
|
@ -615,10 +615,12 @@ class BasicNewsRecipe(Recipe):
|
|||||||
del o['onload']
|
del o['onload']
|
||||||
|
|
||||||
for script in list(soup.findAll('noscript')):
|
for script in list(soup.findAll('noscript')):
|
||||||
script.extract()
|
script.extract()
|
||||||
for attr in self.remove_attributes:
|
for attr in self.remove_attributes:
|
||||||
for x in soup.findAll(attrs={attr:True}):
|
for x in soup.findAll(attrs={attr:True}):
|
||||||
del x[attr]
|
del x[attr]
|
||||||
|
for base in list(soup.findAll('base')):
|
||||||
|
base.extract()
|
||||||
return self.postprocess_html(soup, first_fetch)
|
return self.postprocess_html(soup, first_fetch)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user