Sync to trunk.
BIN
resources/images/news/7seri.png
Normal file
After Width: | Height: | Size: 249 B |
BIN
resources/images/news/adevarul.png
Normal file
After Width: | Height: | Size: 401 B |
BIN
resources/images/news/aventurilapescuit.png
Normal file
After Width: | Height: | Size: 627 B |
BIN
resources/images/news/capital.png
Normal file
After Width: | Height: | Size: 617 B |
BIN
resources/images/news/catavencu.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
resources/images/news/chipro.png
Normal file
After Width: | Height: | Size: 181 B |
BIN
resources/images/news/csid.png
Normal file
After Width: | Height: | Size: 340 B |
BIN
resources/images/news/curierulnational.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/descopera.png
Normal file
After Width: | Height: | Size: 686 B |
BIN
resources/images/news/ecuisine.png
Normal file
After Width: | Height: | Size: 501 B |
BIN
resources/images/news/egirl.png
Normal file
After Width: | Height: | Size: 507 B |
BIN
resources/images/news/fhmro.png
Normal file
After Width: | Height: | Size: 836 B |
BIN
resources/images/news/gandul.png
Normal file
After Width: | Height: | Size: 527 B |
BIN
resources/images/news/go4it.png
Normal file
After Width: | Height: | Size: 827 B |
BIN
resources/images/news/gsp.png
Normal file
After Width: | Height: | Size: 367 B |
BIN
resources/images/news/hotcity.png
Normal file
After Width: | Height: | Size: 722 B |
BIN
resources/images/news/hotnews.png
Normal file
After Width: | Height: | Size: 722 B |
BIN
resources/images/news/intrefete.png
Normal file
After Width: | Height: | Size: 411 B |
BIN
resources/images/news/jurnalulnational.png
Normal file
After Width: | Height: | Size: 863 B |
BIN
resources/images/news/kudika.png
Normal file
After Width: | Height: | Size: 432 B |
BIN
resources/images/news/mediafax.png
Normal file
After Width: | Height: | Size: 657 B |
BIN
resources/images/news/moneyro.png
Normal file
After Width: | Height: | Size: 219 B |
BIN
resources/images/news/nationalgeoro.png
Normal file
After Width: | Height: | Size: 123 B |
BIN
resources/images/news/prosport.png
Normal file
After Width: | Height: | Size: 272 B |
BIN
resources/images/news/realitatea.png
Normal file
After Width: | Height: | Size: 4.0 KiB |
BIN
resources/images/news/romanialibera.png
Normal file
After Width: | Height: | Size: 222 B |
BIN
resources/images/news/sfin.png
Normal file
After Width: | Height: | Size: 229 B |
BIN
resources/images/news/standardmoney.png
Normal file
After Width: | Height: | Size: 510 B |
BIN
resources/images/news/superbebe.png
Normal file
After Width: | Height: | Size: 307 B |
BIN
resources/images/news/tabu.png
Normal file
After Width: | Height: | Size: 441 B |
BIN
resources/images/news/unica.png
Normal file
After Width: | Height: | Size: 327 B |
BIN
resources/images/news/ziarulfinanciar.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
BIN
resources/images/tb_folder.png
Normal file
After Width: | Height: | Size: 6.3 KiB |
51
resources/recipes/7seri.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
sapteseri.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SapteSeri(BasicNewsRecipe):
|
||||||
|
title = u'Sapte Seri'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Sapte Seri'
|
||||||
|
publisher = u'Sapte Seri'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Oras,Distractie,Fun'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'id':'title'})
|
||||||
|
, dict(name='div', attrs={'class':'mt10 mb10'})
|
||||||
|
, dict(name='div', attrs={'class':'mb20 mt10'})
|
||||||
|
, dict(name='div', attrs={'class':'mt5 mb20'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['entityimgworking']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ce se intampla azi in Bucuresti', u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
51
resources/recipes/aventurilapescuit.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
aventurilapescuit.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AventuriLaPescuit(BasicNewsRecipe):
|
||||||
|
title = u'Aventuri La Pescuit'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Aventuri La Pescuit'
|
||||||
|
publisher = 'Aventuri La Pescuit'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Pescuit,Hobby'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'Article'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['right option']})
|
||||||
|
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='iframe', attrs={'scrolling':['no']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.aventurilapescuit.ro/sections/rssread/1')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
52
resources/recipes/chipro.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
chip.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ChipRo(BasicNewsRecipe):
|
||||||
|
title = u'Chip Online'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Chip Online'
|
||||||
|
publisher = 'Chip Online'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,IT'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.chip.ro/images/logo.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h2', attrs={'class':'contentheading clearfix'})
|
||||||
|
, dict(name='span', attrs={'class':'createby'})
|
||||||
|
, dict(name='div', attrs={'class':'article-content'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['sharemecompactbutton']})
|
||||||
|
,dict(name='div', attrs={'align':['left']})
|
||||||
|
,dict(name='div', attrs={'align':['center']})
|
||||||
|
,dict(name='th', attrs={'class':['pagenav_prev']})
|
||||||
|
,dict(name='table', attrs={'class':['pagenav']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.chip.ro/index.php?option=com_ninjarsssyndicator&feed_id=9&format=raw')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
52
resources/recipes/csid.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
csid.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CSID(BasicNewsRecipe):
|
||||||
|
title = u'Ce se \u00eent\u00e2mpl\u0103 doctore?'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Ce se \u00eent\u00e2mpl\u0103 doctore?'
|
||||||
|
publisher = 'CSID'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei,Health,Beauty'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.csid.ro/images/default/csid.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'content floatleft'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['article_links']})
|
||||||
|
, dict(name='div', attrs={'id':['tags']})
|
||||||
|
, dict(name='p', attrs={'id':['tags']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='p', attrs={'id':['tags']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.csid.ro/rss/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
54
resources/recipes/curierulnational.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
curierulnational.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CurierulNal(BasicNewsRecipe):
|
||||||
|
title = u'Curierul Na\u0163ional'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = ''
|
||||||
|
publisher = 'Curierul Na\u0163ional'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.curierulnational.ro/logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'col1'})
|
||||||
|
, dict(name='img', attrs={'id':'placeholder'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='p', attrs={'id':['alteArticole']})
|
||||||
|
, dict(name='div', attrs={'id':['textSize']})
|
||||||
|
, dict(name='ul', attrs={'class':['unit-rating']})
|
||||||
|
, dict(name='div', attrs={'id':['comments']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='ul', attrs={'class':'unit-rating'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.curierulnational.ro/feed.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
57
resources/recipes/descopera.recipe
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
descopera.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Descopera(BasicNewsRecipe):
|
||||||
|
title = u'Descoper\u0103'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'E lumea ta'
|
||||||
|
publisher = 'Descopera'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Descopera'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.descopera.ro/images/header_images/logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'style':'font-family: Arial,Helvetica,sans-serif; font-size: 18px; color: rgb(51, 51, 51); font-weight: bold; margin: 10px 0pt; clear: both; float: left;width: 610px;'})
|
||||||
|
,dict(name='div', attrs={'style':'margin-right: 15px; margin-bottom: 15px; float: left;'})
|
||||||
|
, dict(name='p', attrs={'id':'itemDescription'})
|
||||||
|
,dict(name='div', attrs={'id':'itemBody'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['tools']})
|
||||||
|
, dict(name='div', attrs={'class':['share']})
|
||||||
|
, dict(name='div', attrs={'class':['category']})
|
||||||
|
, dict(name='div', attrs={'id':['comments']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'id':'comments'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.descopera.ro/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
55
resources/recipes/ecuisine.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
ecuisine.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class EcuisineRo(BasicNewsRecipe):
|
||||||
|
title = u'eCuisine'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti'
|
||||||
|
publisher = 'eCuisine'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Retete,Bucatarie'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = ''
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'page-title'})
|
||||||
|
, dict(name='div', attrs={'class':'content clearfix'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='ul', attrs={'id':['recipe-tabs']})
|
||||||
|
, dict(name='div', attrs={'class':['recipe-body-rating clearfix']})
|
||||||
|
, dict(name='div', attrs={'class':['recipe-body-flags']})
|
||||||
|
, dict(name='div', attrs={'id':['tweetmeme_button']})
|
||||||
|
, dict(name='div', attrs={'class':['fbshare']})
|
||||||
|
, dict(name='a', attrs={'class':['button-rounded']})
|
||||||
|
, dict(name='div', attrs={'class':['recipe-body-related']})
|
||||||
|
, dict(name='div', attrs={'class':['fbshare']})
|
||||||
|
, dict(name='div', attrs={'class':['link-wrapper']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.ecuisine.ro/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
43
resources/recipes/egirl.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
egirl.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class EgirlRo(BasicNewsRecipe):
|
||||||
|
title = u'egirl'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Necesar pentru tine'
|
||||||
|
publisher = u'egirl'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.egirl.ro/images/egirlNou/logo_egirl.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'title_art'})
|
||||||
|
, dict(name='div', attrs={'class':'content_style'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.egirl.ro/rss/egirl.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
53
resources/recipes/fhmro.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
fhm.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class FHMro(BasicNewsRecipe):
|
||||||
|
title = u'FHM Ro'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Pentru c\u0103 noi putem'
|
||||||
|
publisher = 'FHM'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Reviste'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'contentMainTitle'})
|
||||||
|
, dict(name='div', attrs={'class':'entry'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':['ratingblock ']})
|
||||||
|
, dict(name='a', attrs={'rel':['tag']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['ratingblock ']})
|
||||||
|
, dict(name='div', attrs={'class':['socialize-containter']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.fhm.ro/feed')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
48
resources/recipes/go4it.recipe
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
go4it.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Go4ITro(BasicNewsRecipe):
|
||||||
|
title = u'go4it'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Gadgeturi, Lifestyle, Tehnologie'
|
||||||
|
publisher = 'go4it'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Reviste,Ziare,IT'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.go4it.ro/images/logo.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'subTitle clearfix'})
|
||||||
|
, dict(name='div', attrs={'class':'story'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='span', attrs={'class':['data']})
|
||||||
|
, dict(name='a', attrs={'class':['comments']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://feeds2.feedburner.com/Go4itro-Stiri')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
43
resources/recipes/hotcity.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
hotcity.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class HotcityRo(BasicNewsRecipe):
|
||||||
|
title = u'Hotcity'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Cultura urban\u0103 feminin\u0103'
|
||||||
|
publisher = 'Hotcity'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.hotcity.ro/i/bg_header.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'articol_title'})
|
||||||
|
, dict(name='div', attrs={'class':'text'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.hotcity.ro/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
52
resources/recipes/intrefete.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
intrefete.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Intrefete(BasicNewsRecipe):
|
||||||
|
title = u'\u00centre fete'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Petrece ziua cu stil, afl\u0103 ce e nou \u00eentre fete'
|
||||||
|
publisher = u'Intre fete'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/2/1401/16788/5878693/5/logo.jpg?width=300'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'article'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['author']})
|
||||||
|
, dict(name='div', attrs={'class':['tags']})
|
||||||
|
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='iframe', attrs={'scrolling':['no']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.intrefete.ro/rss/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
47
resources/recipes/kudika.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
kudika.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kudika(BasicNewsRecipe):
|
||||||
|
title = u'Kudika'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Revist\u0103 pentru femei'
|
||||||
|
publisher = 'Kudika'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://img.kudika.ro/images/template/page-logo.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'header_recommend_article'}),
|
||||||
|
dict(name='div', attrs={'id':'intertext_women'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='p', attrs={'class':['page_breadcrumbs']})
|
||||||
|
, dict(name='div', attrs={'class':['standard']})
|
||||||
|
, dict(name='div', attrs={'id':['recommend_allover']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [ (u'Feeds', u'http://www.kudika.ro/feed.xml') ]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -1,7 +1,9 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Eddie Lau'
|
__copyright__ = '2010-2011, Eddie Lau'
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||||
|
clean up the indentation
|
||||||
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||||
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
||||||
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||||
@ -18,21 +20,21 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
|
|
||||||
|
|
||||||
from calibre import __appname__
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
class MPHKRecipe(BasicNewsRecipe):
|
class MPHKRecipe(BasicNewsRecipe):
|
||||||
IsKindleUsed = True # to avoid generating periodical in which CJK characters can't be displayed in section/article view
|
IsCJKWellSupported = True # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
|
||||||
|
|
||||||
title = 'Ming Pao - Hong Kong'
|
title = 'Ming Pao - Hong Kong'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'Eddie Lau'
|
__author__ = 'Eddie Lau'
|
||||||
description = 'Hong Kong Chinese Newspaper'
|
description = ('Hong Kong Chinese Newspaper (http://news.mingpao.com). If'
|
||||||
publisher = 'news.mingpao.com'
|
'you are using a Kindle with firmware < 3.1, customize the'
|
||||||
|
'recipe')
|
||||||
|
publisher = 'MingPao'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -46,19 +48,20 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
dict(attrs={'class':['photo']}),
|
|
||||||
dict(attrs={'id':['newscontent']}), # entertainment page content
|
dict(attrs={'id':['newscontent']}), # entertainment page content
|
||||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||||
|
dict(attrs={'class':['photo']})
|
||||||
|
]
|
||||||
remove_tags = [dict(name='style'),
|
remove_tags = [dict(name='style'),
|
||||||
dict(attrs={'id':['newscontent135']})] # for the finance page
|
dict(attrs={'id':['newscontent135']})] # for the finance page
|
||||||
remove_attributes = ['width']
|
remove_attributes = ['width']
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: '<h1>'),
|
lambda match: '<h1>'),
|
||||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: '</h1>'),
|
lambda match: '</h1>'),
|
||||||
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||||
lambda match: '')
|
lambda match: '')
|
||||||
]
|
]
|
||||||
|
|
||||||
def image_url_processor(cls, baseurl, url):
|
def image_url_processor(cls, baseurl, url):
|
||||||
@ -107,6 +110,9 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
return self.get_dtlocal().strftime("%Y%m%d")
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def get_fetchformatteddate(self):
|
||||||
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchday(self):
|
def get_fetchday(self):
|
||||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||||
return self.get_dtlocal().strftime("%d")
|
return self.get_dtlocal().strftime("%d")
|
||||||
@ -121,84 +127,66 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
return cover
|
return cover
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
|
||||||
(u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
|
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||||
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
|
||||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
|
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
|
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
|
||||||
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
# special - finance
|
# special - finance
|
||||||
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||||
if fin_articles:
|
if fin_articles:
|
||||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
# special - eco-friendly
|
# special - entertainment
|
||||||
# eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
|
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
# if eco_articles:
|
if ent_articles:
|
||||||
# feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
|
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||||
# special - entertainment
|
return feeds
|
||||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
|
||||||
if ent_articles:
|
|
||||||
feeds.append((u'\u5f71\u8996 Entertainment', ent_articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def parse_section(self, url):
|
def parse_section(self, url):
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||||
current_articles = []
|
current_articles = []
|
||||||
included_urls = []
|
included_urls = []
|
||||||
divs.reverse()
|
divs.reverse()
|
||||||
for i in divs:
|
for i in divs:
|
||||||
a = i.find('a', href = True)
|
a = i.find('a', href = True)
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
def parse_fin_section(self, url):
|
def parse_fin_section(self, url):
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
a = soup.findAll('a', href= True)
|
a = soup.findAll('a', href= True)
|
||||||
current_articles = []
|
current_articles = []
|
||||||
for i in a:
|
|
||||||
url = i.get('href', False)
|
|
||||||
if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
|
||||||
title = self.tag_to_string(i)
|
|
||||||
url = 'http://www.mpfinance.com/cfm/' +url
|
|
||||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
|
||||||
return current_articles
|
|
||||||
|
|
||||||
def parse_eco_section(self, url):
|
|
||||||
dateStr = self.get_fetchdate()
|
|
||||||
soup = self.index_to_soup(url)
|
|
||||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
|
||||||
current_articles = []
|
|
||||||
included_urls = []
|
included_urls = []
|
||||||
for i in divs:
|
for i in a:
|
||||||
a = i.find('a', href = True)
|
url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||||
title = self.tag_to_string(a)
|
if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||||
url = a.get('href', False)
|
title = self.tag_to_string(i)
|
||||||
url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
|
|
||||||
if url not in included_urls and url.rfind('Redirect') == -1 and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1:
|
|
||||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
def parse_ent_section(self, url):
|
def parse_ent_section(self, url):
|
||||||
|
self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
a = soup.findAll('a', href=True)
|
a = soup.findAll('a', href=True)
|
||||||
a.reverse()
|
a.reverse()
|
||||||
@ -223,67 +211,71 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
def create_opf(self, feeds, dir=None):
|
||||||
if self.IsKindleUsed == False:
|
|
||||||
super(MPHKRecipe,self).create_opf(feeds, dir)
|
|
||||||
return
|
|
||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
title = self.short_title()
|
if self.IsCJKWellSupported == True:
|
||||||
title += ' ' + self.get_fetchdate()
|
# use Chinese title
|
||||||
#if self.output_profile.periodical_date_in_title:
|
title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
|
||||||
# title += strftime(self.timefmt)
|
else:
|
||||||
mi = MetaInformation(title, [__appname__])
|
# use English title
|
||||||
mi.publisher = __appname__
|
title = self.short_title() + ' ' + self.get_fetchformatteddate()
|
||||||
mi.author_sort = __appname__
|
if True: # force date in title
|
||||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
# title += strftime(self.timefmt)
|
||||||
#mi.timestamp = nowf()
|
mi = MetaInformation(title, [self.publisher])
|
||||||
mi.timestamp = self.get_dtlocal()
|
mi.publisher = self.publisher
|
||||||
mi.comments = self.description
|
mi.author_sort = self.publisher
|
||||||
if not isinstance(mi.comments, unicode):
|
if self.IsCJKWellSupported == True:
|
||||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||||
#mi.pubdate = nowf()
|
else:
|
||||||
mi.pubdate = self.get_dtlocal()
|
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
#mi.timestamp = nowf()
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
mi.timestamp = self.get_dtlocal()
|
||||||
opf = OPFCreator(dir, mi)
|
mi.comments = self.description
|
||||||
# Add mastheadImage entry to <guide> section
|
if not isinstance(mi.comments, unicode):
|
||||||
mp = getattr(self, 'masthead_path', None)
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
if mp is not None and os.access(mp, os.R_OK):
|
#mi.pubdate = nowf()
|
||||||
from calibre.ebooks.metadata.opf2 import Guide
|
mi.pubdate = self.get_dtlocal()
|
||||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ref.type = 'masthead'
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
ref.title = 'Masthead Image'
|
opf = OPFCreator(dir, mi)
|
||||||
opf.guide.append(ref)
|
# Add mastheadImage entry to <guide> section
|
||||||
|
mp = getattr(self, 'masthead_path', None)
|
||||||
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
|
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||||
|
ref.type = 'masthead'
|
||||||
|
ref.title = 'Masthead Image'
|
||||||
|
opf.guide.append(ref)
|
||||||
|
|
||||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
manifest.append(os.path.join(dir, 'index.html'))
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
# Get cover
|
# Get cover
|
||||||
cpath = getattr(self, 'cover_path', None)
|
cpath = getattr(self, 'cover_path', None)
|
||||||
if cpath is None:
|
if cpath is None:
|
||||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
if self.default_cover(pf):
|
if self.default_cover(pf):
|
||||||
cpath = pf.name
|
cpath = pf.name
|
||||||
if cpath is not None and os.access(cpath, os.R_OK):
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
opf.cover = cpath
|
opf.cover = cpath
|
||||||
manifest.append(cpath)
|
manifest.append(cpath)
|
||||||
|
|
||||||
# Get masthead
|
# Get masthead
|
||||||
mpath = getattr(self, 'masthead_path', None)
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
if mpath is not None and os.access(mpath, os.R_OK):
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
manifest.append(mpath)
|
manifest.append(mpath)
|
||||||
|
|
||||||
opf.create_manifest_from_files_in(manifest)
|
opf.create_manifest_from_files_in(manifest)
|
||||||
for mani in opf.manifest:
|
for mani in opf.manifest:
|
||||||
if mani.path.endswith('.ncx'):
|
if mani.path.endswith('.ncx'):
|
||||||
mani.id = 'ncx'
|
mani.id = 'ncx'
|
||||||
if mani.path.endswith('mastheadImage.jpg'):
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
mani.id = 'masthead-image'
|
mani.id = 'masthead-image'
|
||||||
entries = ['index.html']
|
entries = ['index.html']
|
||||||
toc = TOC(base_path=dir)
|
toc = TOC(base_path=dir)
|
||||||
self.play_order_counter = 0
|
self.play_order_counter = 0
|
||||||
self.play_order_map = {}
|
self.play_order_map = {}
|
||||||
|
|
||||||
def feed_index(num, parent):
|
def feed_index(num, parent):
|
||||||
f = feeds[num]
|
f = feeds[num]
|
||||||
@ -321,7 +313,7 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
templ = self.navbar.generate(True, num, j, len(f),
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
not self.has_single_feed,
|
not self.has_single_feed,
|
||||||
a.orig_url, __appname__, prefix=prefix,
|
a.orig_url, self.publisher, prefix=prefix,
|
||||||
center=self.center_navbar)
|
center=self.center_navbar)
|
||||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
body.insert(len(body.contents), elem)
|
body.insert(len(body.contents), elem)
|
||||||
@ -344,7 +336,7 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
if not desc:
|
if not desc:
|
||||||
desc = None
|
desc = None
|
||||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
f.title, play_order=po, description=desc, author=auth))
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
entries.append('feed_%d/index.html'%0)
|
entries.append('feed_%d/index.html'%0)
|
||||||
@ -357,4 +349,3 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
48
resources/recipes/nationalgeoro.recipe
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
natgeo.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NationalGeoRo(BasicNewsRecipe):
|
||||||
|
title = u'National Geographic RO'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'S\u0103 avem grij\u0103 de planet\u0103'
|
||||||
|
publisher = 'National Geographic'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Reviste'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://wiki.benecke.com/images/c/c4/NatGeographic_Logo.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h2', attrs={'class':'contentheading clearfix'})
|
||||||
|
, dict(name='div', attrs={'class':'article-content'})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['phocagallery']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.natgeo.ro/index.php?format=feed&type=rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
50
resources/recipes/osnews_pl.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
'''
|
||||||
|
OSNews.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class OSNewsRecipe(BasicNewsRecipe):
|
||||||
|
__author__ = u'Mori & Tomasz D\u0142ugosz'
|
||||||
|
language = 'pl'
|
||||||
|
|
||||||
|
title = u'OSnews.pl'
|
||||||
|
publisher = u'OSnews.pl'
|
||||||
|
description = u'OSnews.pl jest spo\u0142eczno\u015bciowym serwisem informacyjnym po\u015bwi\u0119conym oprogramowaniu, systemom operacyjnym i \u015bwiatowi IT'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False;
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.news-heading {font-size:150%}
|
||||||
|
.newsinformations li {display:inline;}
|
||||||
|
blockquote {border:2px solid #000; padding:5px;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'OSNews.pl', u'http://feeds.feedburner.com/OSnewspl')
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name = 'a', attrs = {'class' : 'news-heading'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'newsinformations'}),
|
||||||
|
dict(name = 'div', attrs = {'id' : 'news-content'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name = 'div', attrs = {'class' : 'sociable'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'post_prev'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'post_next'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'clr'})
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
|
59
resources/recipes/romanialibera.recipe
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
romanialibera.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class RomaniaLibera(BasicNewsRecipe):
|
||||||
|
title = u'Rom\u00e2nia Liber\u0103'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = u'Rom\u00e2nia Liber\u0103'
|
||||||
|
publisher = u'Rom\u00e2nia Liber\u0103'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.romanialibera.ro/templates/lilac/images/sigla_1.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'articol'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['art_actions']})
|
||||||
|
, dict(name='div', attrs={'class':['stats']})
|
||||||
|
, dict(name='div', attrs={'class':['data']})
|
||||||
|
, dict(name='div', attrs={'class':['autori']})
|
||||||
|
, dict(name='div', attrs={'class':['banda_explicatii_text']})
|
||||||
|
, dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
|
||||||
|
, dict(name='div', attrs={'class':['aceeasi_tema']})
|
||||||
|
, dict(name='div', attrs={'class':['art_after_text']})
|
||||||
|
, dict(name='div', attrs={'class':['navigare']})
|
||||||
|
, dict(name='div', attrs={'id':['art_text_left']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'art_after_text'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.romanialibera.ro/rss.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
55
resources/recipes/sfin.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
sfin.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Sfin(BasicNewsRecipe):
|
||||||
|
title = u'S\u0103pt\u0103m\u00e2na Financiar\u0103'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'SFIN'
|
||||||
|
publisher = 'SFIN'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Stiri,Economie,Business'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://img.9am.ro/images/logo_surse/saptamana_financiara.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'col2ContentLeft'})
|
||||||
|
, dict(name='div', attrs={'id':'contentArticol'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['infoArticol']})
|
||||||
|
, dict(name='div', attrs={'class':['separator']})
|
||||||
|
, dict(name='div', attrs={'class':['tags']})
|
||||||
|
, dict(name='div', attrs={'id':['comments']})
|
||||||
|
, dict(name='div', attrs={'class':'boxForm'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'tags'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.sfin.ro/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
51
resources/recipes/superbebe.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
superbebe.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Superbebe(BasicNewsRecipe):
|
||||||
|
title = u'Superbebe'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Superbebe'
|
||||||
|
publisher = 'Superbebe'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Bebe,Mamici'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.superbebe.ro/images/superbebe.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'articol'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['info']})
|
||||||
|
, dict(name='div', attrs={'class':['tags']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':['tags']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.superbebe.ro/rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
26
resources/recipes/swiatkindle.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||||
|
'''
|
||||||
|
swiatkindle.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class swiatkindle(BasicNewsRecipe):
|
||||||
|
title = u'Swiat Kindle'
|
||||||
|
description = u'Blog o czytniku Amazon Kindle. Wersje, ksi\u0105\u017cki, kupowanie i korzystanie w Polsce'
|
||||||
|
language = 'pl'
|
||||||
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'\u015awiat Kindle - wpisy', u'http://swiatkindle.pl/feed')]
|
||||||
|
|
||||||
|
remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
|
||||||
|
|
54
resources/recipes/tabu.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
tabu.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TabuRo(BasicNewsRecipe):
|
||||||
|
title = u'Tabu'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Cel mai curajos site de femei'
|
||||||
|
publisher = 'Tabu'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.tabu.ro/img/tabu-logo2.png'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'Article'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['advertisementArticle']}),
|
||||||
|
dict(name='div', attrs={'class':'voting_number'}),
|
||||||
|
dict(name='div', attrs={'id':'number_votes'}),
|
||||||
|
dict(name='div', attrs={'id':'rating_one'}),
|
||||||
|
dict(name='div', attrs={'class':'float: right;'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'id':'comments'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.tabu.ro/rss_all.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
56
resources/recipes/unica.recipe
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||||
|
'''
|
||||||
|
unica.ro
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Unica(BasicNewsRecipe):
|
||||||
|
title = u'Unica'
|
||||||
|
__author__ = u'Silviu Cotoar\u0103'
|
||||||
|
description = 'Asa cum esti tu'
|
||||||
|
publisher = 'Unica'
|
||||||
|
oldest_article = 5
|
||||||
|
language = 'ro'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
category = 'Ziare,Reviste,Femei'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
cover_url = 'http://www.unica.ro/fileadmin/images/logo.gif'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'sticky'})
|
||||||
|
, dict(name='p', attrs={'class':'bodytext'})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['top-links']})
|
||||||
|
, dict(name='div', attrs={'id':['autor_name']})
|
||||||
|
, dict(name='div', attrs={'class':['box-r']})
|
||||||
|
, dict(name='div', attrs={'class':['category']})
|
||||||
|
, dict(name='div', attrs={'class':['data']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='ul', attrs={'class':'pager'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Feeds', u'http://www.unica.ro/rss.html')
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
@ -573,8 +573,8 @@ from calibre.devices.edge.driver import EDGE
|
|||||||
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
|
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
|
||||||
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
|
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
|
||||||
from calibre.devices.sne.driver import SNE
|
from calibre.devices.sne.driver import SNE
|
||||||
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
|
||||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600, LUMIREAD, ALURATEK_COLOR, \
|
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
|
||||||
TREKSTOR, EEEREADER, NEXTBOOK
|
TREKSTOR, EEEREADER, NEXTBOOK
|
||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
@ -691,8 +691,6 @@ plugins += [
|
|||||||
AVANT,
|
AVANT,
|
||||||
MENTOR,
|
MENTOR,
|
||||||
SWEEX,
|
SWEEX,
|
||||||
Q600,
|
|
||||||
KOGAN,
|
|
||||||
PDNOVEL,
|
PDNOVEL,
|
||||||
SPECTRA,
|
SPECTRA,
|
||||||
GEMEI,
|
GEMEI,
|
||||||
|
@ -121,7 +121,8 @@ def enable_plugin(plugin_or_name):
|
|||||||
config['enabled_plugins'] = ep
|
config['enabled_plugins'] = ep
|
||||||
|
|
||||||
default_disabled_plugins = set([
|
default_disabled_plugins = set([
|
||||||
'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers'
|
'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
|
||||||
|
'Kent District Library'
|
||||||
])
|
])
|
||||||
|
|
||||||
def is_disabled(plugin):
|
def is_disabled(plugin):
|
||||||
|
@ -60,7 +60,12 @@ class ANDROID(USBMS):
|
|||||||
0x1004 : { 0x61cc : [0x100] },
|
0x1004 : { 0x61cc : [0x100] },
|
||||||
|
|
||||||
# Archos
|
# Archos
|
||||||
0x0e79 : { 0x1419: [0x0216], 0x1420 : [0x0216], 0x1422 : [0x0216]},
|
0x0e79 : {
|
||||||
|
0x1400 : [0x0222, 0x0216],
|
||||||
|
0x1419 : [0x0216],
|
||||||
|
0x1420 : [0x0216],
|
||||||
|
0x1422 : [0x0216]
|
||||||
|
},
|
||||||
|
|
||||||
# Huawei
|
# Huawei
|
||||||
# Disabled as this USB id is used by various USB flash drives
|
# Disabled as this USB id is used by various USB flash drives
|
||||||
@ -84,10 +89,10 @@ class ANDROID(USBMS):
|
|||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
|
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', '7']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT']
|
'A70S', 'A101IT', '7']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||||
|
|
||||||
|
@ -54,41 +54,24 @@ class AVANT(USBMS):
|
|||||||
class SWEEX(USBMS):
|
class SWEEX(USBMS):
|
||||||
# Identical to the Promedia
|
# Identical to the Promedia
|
||||||
name = 'Sweex Device Interface'
|
name = 'Sweex Device Interface'
|
||||||
gui_name = 'Sweex'
|
gui_name = 'Sweex/Kogan/Q600/Wink'
|
||||||
description = _('Communicate with the Sweex MM300')
|
description = _('Communicate with the Sweex/Kogan/Q600/Wink')
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
||||||
|
|
||||||
VENDOR_ID = [0x0525, 0x177f]
|
VENDOR_ID = [0x0525, 0x177f]
|
||||||
PRODUCT_ID = [0xa4a5, 0x300]
|
PRODUCT_ID = [0xa4a5, 0x300]
|
||||||
BCD = [0x0319, 0x110]
|
BCD = [0x0319, 0x110, 0x325]
|
||||||
|
|
||||||
VENDOR_NAME = 'SWEEX'
|
VENDOR_NAME = ['SWEEX', 'LINUX']
|
||||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOKREADER', 'FILE-STOR_GADGET']
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
class Q600(SWEEX):
|
|
||||||
|
|
||||||
name = 'Digma Q600 Device interface'
|
|
||||||
gui_name = 'Q600'
|
|
||||||
description = _('Communicate with the Digma Q600')
|
|
||||||
|
|
||||||
BCD = [0x325]
|
|
||||||
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
|
||||||
|
|
||||||
class KOGAN(SWEEX):
|
|
||||||
|
|
||||||
name = 'Kogan Device Interface'
|
|
||||||
gui_name = 'Kogan'
|
|
||||||
description = _('Communicate with the Kogan')
|
|
||||||
VENDOR_NAME = 'LINUX'
|
|
||||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
|
||||||
EBOOK_DIR_MAIN = 'Kogan eBooks'
|
|
||||||
|
|
||||||
class PDNOVEL(USBMS):
|
class PDNOVEL(USBMS):
|
||||||
name = 'Pandigital Novel device interface'
|
name = 'Pandigital Novel device interface'
|
||||||
|
@ -33,7 +33,7 @@ class HeuristicProcessor(object):
|
|||||||
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}', re.IGNORECASE)
|
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}', re.IGNORECASE)
|
||||||
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
|
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
|
||||||
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
||||||
self.single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
|
self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*</(p|div)>)', re.IGNORECASE)
|
||||||
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
||||||
self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]'
|
self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]'
|
||||||
self.common_in_text_beginnings = u'[\w\'\"“‘‛]'
|
self.common_in_text_beginnings = u'[\w\'\"“‘‛]'
|
||||||
@ -451,8 +451,8 @@ class HeuristicProcessor(object):
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
def detect_whitespace(self, html):
|
def detect_whitespace(self, html):
|
||||||
blanks_around_headings = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
|
blanks_around_headings = re.compile(r'(?P<initparas>(<(p|div)[^>]*>\s*</(p|div)>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<(p|div)[^>]*>\s*</(p|div)>\s*){1,})?', re.IGNORECASE|re.DOTALL)
|
||||||
blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
|
blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE|re.DOTALL)
|
||||||
|
|
||||||
def merge_header_whitespace(match):
|
def merge_header_whitespace(match):
|
||||||
initblanks = match.group('initparas')
|
initblanks = match.group('initparas')
|
||||||
@ -485,6 +485,21 @@ class HeuristicProcessor(object):
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
def detect_soft_breaks(self, html):
|
def detect_soft_breaks(self, html):
|
||||||
|
line = '(?P<initline>'+self.line_open+'\s*(?P<init_content>.*?)'+self.line_close+')'
|
||||||
|
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+'\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
|
||||||
|
div_break_candidate_pattern = line+'\s*<div[^>]*>\s*</div>\s*'+line_two
|
||||||
|
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
|
||||||
|
|
||||||
|
def convert_div_softbreaks(match):
|
||||||
|
init_is_paragraph = self.check_paragraph(match.group('init_content'))
|
||||||
|
line_two_is_paragraph = self.check_paragraph(match.group('line_two_content'))
|
||||||
|
if init_is_paragraph and line_two_is_paragraph:
|
||||||
|
return match.group('initline')+'\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>\n'+match.group('line_two')
|
||||||
|
else:
|
||||||
|
return match.group(0)
|
||||||
|
|
||||||
|
html = div_break_candidate.sub(convert_div_softbreaks, html)
|
||||||
|
|
||||||
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
||||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html)
|
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html)
|
||||||
else:
|
else:
|
||||||
@ -523,6 +538,14 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
return scene_break
|
return scene_break
|
||||||
|
|
||||||
|
def check_paragraph(self, content):
|
||||||
|
content = re.sub('\s*</?span[^>]*>\s*', '', content)
|
||||||
|
if re.match('.*[\"\'.!?:]$', content):
|
||||||
|
#print "detected this as a paragraph"
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
def abbyy_processor(self, html):
|
def abbyy_processor(self, html):
|
||||||
abbyy_line = re.compile('((?P<linestart><p\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
abbyy_line = re.compile('((?P<linestart><p\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
||||||
empty_paragraph = '\n<p> </p>\n'
|
empty_paragraph = '\n<p> </p>\n'
|
||||||
@ -530,14 +553,6 @@ class HeuristicProcessor(object):
|
|||||||
self.previous_was_paragraph = False
|
self.previous_was_paragraph = False
|
||||||
html = re.sub('</?a[^>]*>', '', html)
|
html = re.sub('</?a[^>]*>', '', html)
|
||||||
|
|
||||||
def check_paragraph(content):
|
|
||||||
content = re.sub('\s*</?span[^>]*>\s*', '', content)
|
|
||||||
if re.match('.*[\"\'.!?:]$', content):
|
|
||||||
#print "detected this as a paragraph"
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def convert_styles(match):
|
def convert_styles(match):
|
||||||
#print "raw styles are: "+match.group('styles')
|
#print "raw styles are: "+match.group('styles')
|
||||||
content = match.group('content')
|
content = match.group('content')
|
||||||
@ -565,7 +580,7 @@ class HeuristicProcessor(object):
|
|||||||
return blockquote_close_loop+'\n'+image+'\n'
|
return blockquote_close_loop+'\n'+image+'\n'
|
||||||
else:
|
else:
|
||||||
styles = match.group('styles').split(';')
|
styles = match.group('styles').split(';')
|
||||||
is_paragraph = check_paragraph(content)
|
is_paragraph = self.check_paragraph(content)
|
||||||
#print "styles for this line are: "+str(styles)
|
#print "styles for this line are: "+str(styles)
|
||||||
split_styles = []
|
split_styles = []
|
||||||
for style in styles:
|
for style in styles:
|
||||||
|
@ -350,6 +350,8 @@ class FB2MLizer(object):
|
|||||||
# Number of blank lines above tag
|
# Number of blank lines above tag
|
||||||
try:
|
try:
|
||||||
ems = int(round((float(style.marginTop) / style.fontSize) - 1))
|
ems = int(round((float(style.marginTop) / style.fontSize) - 1))
|
||||||
|
if ems < 0:
|
||||||
|
ems = 0
|
||||||
except:
|
except:
|
||||||
ems = 0
|
ems = 0
|
||||||
|
|
||||||
@ -397,7 +399,7 @@ class FB2MLizer(object):
|
|||||||
fb2_out += p_txt
|
fb2_out += p_txt
|
||||||
tags += p_tag
|
tags += p_tag
|
||||||
fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
|
fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
|
||||||
if tag in ('br', 'hr') or ems:
|
if tag in ('br', 'hr') or ems >= 1:
|
||||||
if ems < 1:
|
if ems < 1:
|
||||||
multiplier = 1
|
multiplier = 1
|
||||||
else:
|
else:
|
||||||
|
@ -205,7 +205,10 @@ def main(args=sys.argv):
|
|||||||
open(cpath, 'wb').write(br.open_novisit(curl).read())
|
open(cpath, 'wb').write(br.open_novisit(curl).read())
|
||||||
print 'Cover for', title, 'saved to', cpath
|
print 'Cover for', title, 'saved to', cpath
|
||||||
|
|
||||||
|
#import time
|
||||||
|
#st = time.time()
|
||||||
print get_social_metadata(title, None, None, isbn)
|
print get_social_metadata(title, None, None, isbn)
|
||||||
|
#print '\n\n', time.time() - st, '\n\n'
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -106,6 +106,9 @@ class MetadataSource(Plugin): # {{{
|
|||||||
def join(self):
|
def join(self):
|
||||||
return self.worker.join()
|
return self.worker.join()
|
||||||
|
|
||||||
|
def is_alive(self):
|
||||||
|
return self.worker.is_alive()
|
||||||
|
|
||||||
def is_customizable(self):
|
def is_customizable(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -251,7 +254,9 @@ class KentDistrictLibrary(MetadataSource): # {{{
|
|||||||
|
|
||||||
name = 'Kent District Library'
|
name = 'Kent District Library'
|
||||||
metadata_type = 'social'
|
metadata_type = 'social'
|
||||||
description = _('Downloads series information from ww2.kdl.org')
|
description = _('Downloads series information from ww2.kdl.org. '
|
||||||
|
'This website cannot handle large numbers of queries, '
|
||||||
|
'so the plugin is disabled by default.')
|
||||||
|
|
||||||
def fetch(self):
|
def fetch(self):
|
||||||
if not self.title or not self.book_author:
|
if not self.title or not self.book_author:
|
||||||
|
@ -5,7 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, urllib, urlparse
|
import re, urllib, urlparse, socket
|
||||||
|
|
||||||
|
from mechanize import URLError
|
||||||
|
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
@ -17,7 +19,7 @@ URL = \
|
|||||||
|
|
||||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||||
|
|
||||||
def get_series(title, authors):
|
def get_series(title, authors, timeout=60):
|
||||||
mi = Metadata(title, authors)
|
mi = Metadata(title, authors)
|
||||||
if title and title[0] in _ignore_starts:
|
if title and title[0] in _ignore_starts:
|
||||||
title = title[1:]
|
title = title[1:]
|
||||||
@ -39,7 +41,12 @@ def get_series(title, authors):
|
|||||||
|
|
||||||
url = URL.format(author, title)
|
url = URL.format(author, title)
|
||||||
br = browser()
|
br = browser()
|
||||||
raw = br.open(url).read()
|
try:
|
||||||
|
raw = br.open_novisit(url, timeout=timeout).read()
|
||||||
|
except URLError, e:
|
||||||
|
if isinstance(e.reason, socket.timeout):
|
||||||
|
raise Exception('KDL Server busy, try again later')
|
||||||
|
raise
|
||||||
if 'see the full results' not in raw:
|
if 'see the full results' not in raw:
|
||||||
return mi
|
return mi
|
||||||
raw = xml_to_unicode(raw)[0]
|
raw = xml_to_unicode(raw)[0]
|
||||||
|
@ -85,7 +85,8 @@ class Source(Plugin):
|
|||||||
|
|
||||||
# Metadata API {{{
|
# Metadata API {{{
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
|
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||||
|
identifiers={}, timeout=5):
|
||||||
'''
|
'''
|
||||||
Identify a book by its title/author/isbn/etc.
|
Identify a book by its title/author/isbn/etc.
|
||||||
|
|
||||||
@ -98,6 +99,8 @@ class Source(Plugin):
|
|||||||
:param authors: A list of authors of the book, can be None
|
:param authors: A list of authors of the book, can be None
|
||||||
:param identifiers: A dictionary of other identifiers, most commonly
|
:param identifiers: A dictionary of other identifiers, most commonly
|
||||||
{'isbn':'1234...'}
|
{'isbn':'1234...'}
|
||||||
|
:param timeout: Timeout in seconds, no network request should hang for
|
||||||
|
longer than timeout.
|
||||||
:return: None if no errors occurred, otherwise a unicode representation
|
:return: None if no errors occurred, otherwise a unicode representation
|
||||||
of the error suitable for showing to the user
|
of the error suitable for showing to the user
|
||||||
|
|
||||||
|
@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import time
|
import time
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from threading import Thread
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -18,6 +17,7 @@ from calibre.ebooks.metadata.sources.base import Source
|
|||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from calibre import browser, as_unicode
|
from calibre import browser, as_unicode
|
||||||
|
|
||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
@ -41,20 +41,20 @@ subject = XPath('descendant::dc:subject')
|
|||||||
description = XPath('descendant::dc:description')
|
description = XPath('descendant::dc:description')
|
||||||
language = XPath('descendant::dc:language')
|
language = XPath('descendant::dc:language')
|
||||||
|
|
||||||
def get_details(browser, url):
|
def get_details(browser, url, timeout):
|
||||||
try:
|
try:
|
||||||
raw = browser.open_novisit(url).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
gc = getattr(e, 'getcode', lambda : -1)
|
gc = getattr(e, 'getcode', lambda : -1)
|
||||||
if gc() != 403:
|
if gc() != 403:
|
||||||
raise
|
raise
|
||||||
# Google is throttling us, wait a little
|
# Google is throttling us, wait a little
|
||||||
time.sleep(2)
|
time.sleep(1)
|
||||||
raw = browser.open_novisit(url).read()
|
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||||
|
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
def to_metadata(browser, log, entry_):
|
def to_metadata(browser, log, entry_, timeout):
|
||||||
|
|
||||||
def get_text(extra, x):
|
def get_text(extra, x):
|
||||||
try:
|
try:
|
||||||
@ -79,8 +79,9 @@ def to_metadata(browser, log, entry_):
|
|||||||
|
|
||||||
mi = Metadata(title_, authors)
|
mi = Metadata(title_, authors)
|
||||||
try:
|
try:
|
||||||
raw = get_details(browser, id_url)
|
raw = get_details(browser, id_url, timeout)
|
||||||
feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True)[0])
|
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||||
|
strip_encoding_pats=True)[0])
|
||||||
extra = entry(feed)[0]
|
extra = entry(feed)[0]
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to get additional details for', mi.title)
|
log.exception('Failed to get additional details for', mi.title)
|
||||||
@ -131,26 +132,19 @@ def to_metadata(browser, log, entry_):
|
|||||||
|
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
class Worker(Thread):
|
|
||||||
|
|
||||||
def __init__(self, log, entries, abort, result_queue):
|
def get_all_details(br, log, entries, abort, result_queue, timeout):
|
||||||
self.browser, self.log, self.entries = browser(), log, entries
|
for i in entries:
|
||||||
self.abort, self.result_queue = abort, result_queue
|
try:
|
||||||
Thread.__init__(self)
|
ans = to_metadata(br, log, i, timeout)
|
||||||
self.daemon = True
|
if isinstance(ans, Metadata):
|
||||||
|
result_queue.put(ans)
|
||||||
def run(self):
|
except:
|
||||||
for i in self.entries:
|
log.exception(
|
||||||
try:
|
'Failed to get metadata for identify entry:',
|
||||||
ans = to_metadata(self.browser, self.log, i)
|
etree.tostring(i))
|
||||||
if isinstance(ans, Metadata):
|
if abort.is_set():
|
||||||
self.result_queue.put(ans)
|
break
|
||||||
except:
|
|
||||||
self.log.exception(
|
|
||||||
'Failed to get metadata for identify entry:',
|
|
||||||
etree.tostring(i))
|
|
||||||
if self.abort.is_set():
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleBooks(Source):
|
class GoogleBooks(Source):
|
||||||
@ -192,54 +186,40 @@ class GoogleBooks(Source):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
|
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||||
|
identifiers={}, timeout=5):
|
||||||
query = self.create_query(log, title=title, authors=authors,
|
query = self.create_query(log, title=title, authors=authors,
|
||||||
identifiers=identifiers)
|
identifiers=identifiers)
|
||||||
|
br = browser()
|
||||||
try:
|
try:
|
||||||
raw = browser().open_novisit(query).read()
|
raw = br.open_novisit(query, timeout=timeout).read()
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
log.exception('Failed to make identify query: %r'%query)
|
log.exception('Failed to make identify query: %r'%query)
|
||||||
return as_unicode(e)
|
return as_unicode(e)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
feed = etree.fromstring(xml_to_unicode(raw,
|
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||||
strip_encoding_pats=True)[0], parser=parser)
|
strip_encoding_pats=True)[0], parser=parser)
|
||||||
entries = entry(feed)
|
entries = entry(feed)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
log.exception('Failed to parse identify results')
|
log.exception('Failed to parse identify results')
|
||||||
return as_unicode(e)
|
return as_unicode(e)
|
||||||
|
|
||||||
|
# There is no point running these queries in threads as google
|
||||||
groups = self.split_jobs(entries, 5) # At most 5 threads
|
# throttles requests returning Forbidden errors
|
||||||
if not groups:
|
get_all_details(br, log, entries, abort, result_queue, timeout)
|
||||||
return None
|
|
||||||
workers = [Worker(log, entries, abort, result_queue) for entries in
|
|
||||||
groups]
|
|
||||||
|
|
||||||
if abort.is_set():
|
|
||||||
return None
|
|
||||||
|
|
||||||
for worker in workers: worker.start()
|
|
||||||
|
|
||||||
has_alive_worker = True
|
|
||||||
while has_alive_worker and not abort.is_set():
|
|
||||||
time.sleep(0.1)
|
|
||||||
has_alive_worker = False
|
|
||||||
for worker in workers:
|
|
||||||
if worker.is_alive():
|
|
||||||
has_alive_worker = True
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
|
||||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||||
isbn_test)
|
title_test)
|
||||||
test_identify_plugin(GoogleBooks.name,
|
test_identify_plugin(GoogleBooks.name,
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
{'title': 'Great Expectations', 'authors':['Charles Dickens']},
|
{'title': 'Great Expectations', 'authors':['Charles Dickens']},
|
||||||
[isbn_test('9781607541592')]
|
[title_test('Great Expectations', exact=True)]
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, tempfile
|
import os, tempfile, time
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Event
|
from threading import Event
|
||||||
|
|
||||||
@ -26,6 +26,17 @@ def isbn_test(isbn):
|
|||||||
|
|
||||||
return test
|
return test
|
||||||
|
|
||||||
|
def title_test(title, exact=False):
|
||||||
|
|
||||||
|
title = title.lower()
|
||||||
|
|
||||||
|
def test(mi):
|
||||||
|
mt = mi.title.lower()
|
||||||
|
return (exact and mt == title) or \
|
||||||
|
(not exact and title in mt)
|
||||||
|
|
||||||
|
return test
|
||||||
|
|
||||||
def test_identify_plugin(name, tests):
|
def test_identify_plugin(name, tests):
|
||||||
'''
|
'''
|
||||||
:param name: Plugin name
|
:param name: Plugin name
|
||||||
@ -48,11 +59,15 @@ def test_identify_plugin(name, tests):
|
|||||||
abort = Event()
|
abort = Event()
|
||||||
prints('Log saved to', lf)
|
prints('Log saved to', lf)
|
||||||
|
|
||||||
|
times = []
|
||||||
for kwargs, test_funcs in tests:
|
for kwargs, test_funcs in tests:
|
||||||
prints('Running test with:', kwargs)
|
prints('Running test with:', kwargs)
|
||||||
rq = Queue()
|
rq = Queue()
|
||||||
args = (log, rq, abort)
|
args = (log, rq, abort)
|
||||||
|
start_time = time.time()
|
||||||
err = plugin.identify(*args, **kwargs)
|
err = plugin.identify(*args, **kwargs)
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
times.append(total_time)
|
||||||
if err is not None:
|
if err is not None:
|
||||||
prints('identify returned an error for args', args)
|
prints('identify returned an error for args', args)
|
||||||
prints(err)
|
prints(err)
|
||||||
@ -87,6 +102,8 @@ def test_identify_plugin(name, tests):
|
|||||||
prints('Log saved to', lf)
|
prints('Log saved to', lf)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
prints('Average time per query', sum(times)/len(times))
|
||||||
|
|
||||||
if os.stat(lf).st_size > 10:
|
if os.stat(lf).st_size > 10:
|
||||||
prints('There were some errors, see log', lf)
|
prints('There were some errors, see log', lf)
|
||||||
|
|
||||||
|
@ -11,6 +11,12 @@ from calibre import browser
|
|||||||
|
|
||||||
class xISBN(object):
|
class xISBN(object):
|
||||||
|
|
||||||
|
'''
|
||||||
|
This class is used to find the ISBN numbers of "related" editions of a
|
||||||
|
book, given its ISBN. Useful when querying services for metadata by ISBN,
|
||||||
|
in case they do not have the ISBN for the particular edition.
|
||||||
|
'''
|
||||||
|
|
||||||
QUERY = 'http://xisbn.worldcat.org/webservices/xid/isbn/%s?method=getEditions&format=json&fl=form,year,lang,ed'
|
QUERY = 'http://xisbn.worldcat.org/webservices/xid/isbn/%s?method=getEditions&format=json&fl=form,year,lang,ed'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -485,8 +485,8 @@ class MobiReader(object):
|
|||||||
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
||||||
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
||||||
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
||||||
self.processed_html = re.sub(r'(?i)(?P<blockquote>(</blockquote[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
self.processed_html = re.sub(r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
||||||
self.processed_html = re.sub(r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<blockquote[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
self.processed_html = re.sub(r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
||||||
|
|
||||||
|
|
||||||
def remove_random_bytes(self, html):
|
def remove_random_bytes(self, html):
|
||||||
|
@ -515,7 +515,7 @@ class Metadata(object):
|
|||||||
'publisher', 'relation', 'rights', 'source',
|
'publisher', 'relation', 'rights', 'source',
|
||||||
'subject', 'title', 'type'])
|
'subject', 'title', 'type'])
|
||||||
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
|
CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
|
||||||
'publication_type'])
|
'publication_type', 'title_sort'])
|
||||||
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
||||||
'scheme': OPF('scheme'), 'event': OPF('event'),
|
'scheme': OPF('scheme'), 'event': OPF('event'),
|
||||||
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
||||||
|
@ -18,7 +18,8 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
|
|||||||
if mi.title_sort:
|
if mi.title_sort:
|
||||||
if not m.title:
|
if not m.title:
|
||||||
m.add('title', mi.title_sort)
|
m.add('title', mi.title_sort)
|
||||||
m.title[0].file_as = mi.title_sort
|
m.clear('title_sort')
|
||||||
|
m.add('title_sort', mi.title_sort)
|
||||||
if not mi.is_null('authors'):
|
if not mi.is_null('authors'):
|
||||||
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
|
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
|
||||||
for a in mi.authors:
|
for a in mi.authors:
|
||||||
|
@ -259,6 +259,7 @@ class AddAction(InterfaceAction):
|
|||||||
if hasattr(self.gui, 'db_images'):
|
if hasattr(self.gui, 'db_images'):
|
||||||
self.gui.db_images.reset()
|
self.gui.db_images.reset()
|
||||||
self.gui.tags_view.recount()
|
self.gui.tags_view.recount()
|
||||||
|
|
||||||
if getattr(self._adder, 'merged_books', False):
|
if getattr(self._adder, 'merged_books', False):
|
||||||
books = u'\n'.join([x if isinstance(x, unicode) else
|
books = u'\n'.join([x if isinstance(x, unicode) else
|
||||||
x.decode(preferred_encoding, 'replace') for x in
|
x.decode(preferred_encoding, 'replace') for x in
|
||||||
@ -266,6 +267,17 @@ class AddAction(InterfaceAction):
|
|||||||
info_dialog(self.gui, _('Merged some books'),
|
info_dialog(self.gui, _('Merged some books'),
|
||||||
_('The following duplicate books were found and incoming book formats were '
|
_('The following duplicate books were found and incoming book formats were '
|
||||||
'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True)
|
'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True)
|
||||||
|
|
||||||
|
if getattr(self._adder, 'number_of_books_added', 0) > 0 or \
|
||||||
|
getattr(self._adder, 'merged_books', False):
|
||||||
|
# The formats of the current book could have changed if
|
||||||
|
# automerge is enabled
|
||||||
|
current_idx = self.gui.library_view.currentIndex()
|
||||||
|
if current_idx.isValid():
|
||||||
|
self.gui.library_view.model().current_changed(current_idx,
|
||||||
|
current_idx)
|
||||||
|
|
||||||
|
|
||||||
if getattr(self._adder, 'critical', None):
|
if getattr(self._adder, 'critical', None):
|
||||||
det_msg = []
|
det_msg = []
|
||||||
for name, log in self._adder.critical.items():
|
for name, log in self._adder.critical.items():
|
||||||
|
@ -254,7 +254,8 @@ class EditorWidget(QWebView): # {{{
|
|||||||
f = QFontInfo(QApplication.font(self)).pixelSize()
|
f = QFontInfo(QApplication.font(self)).pixelSize()
|
||||||
style = 'font-size: %dpx;' % (f,)
|
style = 'font-size: %dpx;' % (f,)
|
||||||
|
|
||||||
for body in self.page().mainFrame().documentElement().findAll('body'):
|
# toList() is needed because PyQt on Debian is old/broken
|
||||||
|
for body in self.page().mainFrame().documentElement().findAll('body').toList():
|
||||||
body.setAttribute('style', style)
|
body.setAttribute('style', style)
|
||||||
self.page().setContentEditable(True)
|
self.page().setContentEditable(True)
|
||||||
|
|
||||||
|
@ -1096,7 +1096,8 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
if tag in nodes_seen:
|
if tag in nodes_seen:
|
||||||
continue
|
continue
|
||||||
nodes_seen.add(tag)
|
nodes_seen.add(tag)
|
||||||
ans.append('%s%s:"=%s"'%(prefix, category, tag.name))
|
ans.append('%s%s:"=%s"'%(prefix, category,
|
||||||
|
tag.name.replace(r'"', r'\"')))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def find_item_node(self, key, txt, start_path):
|
def find_item_node(self, key, txt, start_path):
|
||||||
|
@ -2356,7 +2356,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
rating = int(rating)
|
rating = int(rating)
|
||||||
self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
|
self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
|
||||||
rat = self.conn.get('SELECT id FROM ratings WHERE rating=?', (rating,), all=False)
|
rat = self.conn.get('SELECT id FROM ratings WHERE rating=?', (rating,), all=False)
|
||||||
rat = rat if rat else self.conn.execute('INSERT INTO ratings(rating) VALUES (?)', (rating,)).lastrowid
|
rat = rat if rat is not None else self.conn.execute('INSERT INTO ratings(rating) VALUES (?)', (rating,)).lastrowid
|
||||||
self.conn.execute('INSERT INTO books_ratings_link(book, rating) VALUES (?,?)', (id, rat))
|
self.conn.execute('INSERT INTO books_ratings_link(book, rating) VALUES (?,?)', (id, rat))
|
||||||
self.dirtied([id], commit=False)
|
self.dirtied([id], commit=False)
|
||||||
if commit:
|
if commit:
|
||||||
|
@ -313,24 +313,10 @@ class CategoryFeed(NavFeed):
|
|||||||
ignore_count = False
|
ignore_count = False
|
||||||
if which == 'search':
|
if which == 'search':
|
||||||
ignore_count = True
|
ignore_count = True
|
||||||
uc = None
|
|
||||||
if which.endswith(':'):
|
|
||||||
# We have a user category. Translate back to original categories
|
|
||||||
uc = {}
|
|
||||||
try:
|
|
||||||
ucs = db.prefs['user_categories']
|
|
||||||
ucs = ucs.get(which[:-1])
|
|
||||||
for name, category, index in ucs:
|
|
||||||
uc[name] = category
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
uc = None
|
|
||||||
for item in items:
|
for item in items:
|
||||||
if uc: which = uc.get(item.name, which)
|
self.root.append(CATALOG_ENTRY(item, item.category, base_href, version,
|
||||||
self.root.append(CATALOG_ENTRY(item, which, base_href, version,
|
|
||||||
updated, ignore_count=ignore_count,
|
updated, ignore_count=ignore_count,
|
||||||
add_kind=uc is not None))
|
add_kind=which != item.category))
|
||||||
|
|
||||||
class CategoryGroupFeed(NavFeed):
|
class CategoryGroupFeed(NavFeed):
|
||||||
|
|
||||||
|