Merge from trunk
BIN
resources/images/news/7seri.png
Normal file
After Width: | Height: | Size: 249 B |
BIN
resources/images/news/adevarul.png
Normal file
After Width: | Height: | Size: 401 B |
BIN
resources/images/news/aventurilapescuit.png
Normal file
After Width: | Height: | Size: 627 B |
BIN
resources/images/news/capital.png
Normal file
After Width: | Height: | Size: 617 B |
BIN
resources/images/news/catavencu.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
resources/images/news/chipro.png
Normal file
After Width: | Height: | Size: 181 B |
BIN
resources/images/news/csid.png
Normal file
After Width: | Height: | Size: 340 B |
BIN
resources/images/news/curierulnational.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/descopera.png
Normal file
After Width: | Height: | Size: 686 B |
BIN
resources/images/news/ecuisine.png
Normal file
After Width: | Height: | Size: 501 B |
BIN
resources/images/news/egirl.png
Normal file
After Width: | Height: | Size: 507 B |
BIN
resources/images/news/fhmro.png
Normal file
After Width: | Height: | Size: 836 B |
BIN
resources/images/news/gandul.png
Normal file
After Width: | Height: | Size: 527 B |
BIN
resources/images/news/go4it.png
Normal file
After Width: | Height: | Size: 827 B |
BIN
resources/images/news/gsp.png
Normal file
After Width: | Height: | Size: 367 B |
BIN
resources/images/news/hotcity.png
Normal file
After Width: | Height: | Size: 722 B |
BIN
resources/images/news/hotnews.png
Normal file
After Width: | Height: | Size: 722 B |
BIN
resources/images/news/intrefete.png
Normal file
After Width: | Height: | Size: 411 B |
BIN
resources/images/news/jurnalulnational.png
Normal file
After Width: | Height: | Size: 863 B |
BIN
resources/images/news/kudika.png
Normal file
After Width: | Height: | Size: 432 B |
BIN
resources/images/news/mediafax.png
Normal file
After Width: | Height: | Size: 657 B |
BIN
resources/images/news/moneyro.png
Normal file
After Width: | Height: | Size: 219 B |
BIN
resources/images/news/nationalgeoro.png
Normal file
After Width: | Height: | Size: 123 B |
BIN
resources/images/news/prosport.png
Normal file
After Width: | Height: | Size: 272 B |
BIN
resources/images/news/realitatea.png
Normal file
After Width: | Height: | Size: 4.0 KiB |
BIN
resources/images/news/romanialibera.png
Normal file
After Width: | Height: | Size: 222 B |
BIN
resources/images/news/sfin.png
Normal file
After Width: | Height: | Size: 229 B |
BIN
resources/images/news/standardmoney.png
Normal file
After Width: | Height: | Size: 510 B |
BIN
resources/images/news/superbebe.png
Normal file
After Width: | Height: | Size: 307 B |
BIN
resources/images/news/tabu.png
Normal file
After Width: | Height: | Size: 441 B |
BIN
resources/images/news/unica.png
Normal file
After Width: | Height: | Size: 327 B |
BIN
resources/images/news/ziarulfinanciar.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
51
resources/recipes/7seri.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
sapteseri.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SapteSeri(BasicNewsRecipe):
|
||||
title = u'Sapte Seri'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Sapte Seri'
|
||||
publisher = u'Sapte Seri'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Oras,Distractie,Fun'
|
||||
encoding = 'utf-8'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'id':'title'})
|
||||
, dict(name='div', attrs={'class':'mt10 mb10'})
|
||||
, dict(name='div', attrs={'class':'mb20 mt10'})
|
||||
, dict(name='div', attrs={'class':'mt5 mb20'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['entityimgworking']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Ce se intampla azi in Bucuresti', u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
51
resources/recipes/aventurilapescuit.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
aventurilapescuit.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AventuriLaPescuit(BasicNewsRecipe):
|
||||
title = u'Aventuri La Pescuit'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Aventuri La Pescuit'
|
||||
publisher = 'Aventuri La Pescuit'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Pescuit,Hobby'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'Article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['right option']})
|
||||
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='iframe', attrs={'scrolling':['no']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.aventurilapescuit.ro/sections/rssread/1')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
52
resources/recipes/chipro.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
chip.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ChipRo(BasicNewsRecipe):
|
||||
title = u'Chip Online'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Chip Online'
|
||||
publisher = 'Chip Online'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,IT'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.chip.ro/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class':'contentheading clearfix'})
|
||||
, dict(name='span', attrs={'class':'createby'})
|
||||
, dict(name='div', attrs={'class':'article-content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['sharemecompactbutton']})
|
||||
,dict(name='div', attrs={'align':['left']})
|
||||
,dict(name='div', attrs={'align':['center']})
|
||||
,dict(name='th', attrs={'class':['pagenav_prev']})
|
||||
,dict(name='table', attrs={'class':['pagenav']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.chip.ro/index.php?option=com_ninjarsssyndicator&feed_id=9&format=raw')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
52
resources/recipes/csid.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
csid.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CSID(BasicNewsRecipe):
|
||||
title = u'Ce se \u00eent\u00e2mpl\u0103 doctore?'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Ce se \u00eent\u00e2mpl\u0103 doctore?'
|
||||
publisher = 'CSID'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei,Health,Beauty'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.csid.ro/images/default/csid.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'content floatleft'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['article_links']})
|
||||
, dict(name='div', attrs={'id':['tags']})
|
||||
, dict(name='p', attrs={'id':['tags']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='p', attrs={'id':['tags']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.csid.ro/rss/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
54
resources/recipes/curierulnational.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
curierulnational.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CurierulNal(BasicNewsRecipe):
|
||||
title = u'Curierul Na\u0163ional'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = ''
|
||||
publisher = 'Curierul Na\u0163ional'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.curierulnational.ro/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'col1'})
|
||||
, dict(name='img', attrs={'id':'placeholder'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'id':['alteArticole']})
|
||||
, dict(name='div', attrs={'id':['textSize']})
|
||||
, dict(name='ul', attrs={'class':['unit-rating']})
|
||||
, dict(name='div', attrs={'id':['comments']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class':'unit-rating'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.curierulnational.ro/feed.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
57
resources/recipes/descopera.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
descopera.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Descopera(BasicNewsRecipe):
|
||||
title = u'Descoper\u0103'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'E lumea ta'
|
||||
publisher = 'Descopera'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Descopera'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.descopera.ro/images/header_images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'style':'font-family: Arial,Helvetica,sans-serif; font-size: 18px; color: rgb(51, 51, 51); font-weight: bold; margin: 10px 0pt; clear: both; float: left;width: 610px;'})
|
||||
,dict(name='div', attrs={'style':'margin-right: 15px; margin-bottom: 15px; float: left;'})
|
||||
, dict(name='p', attrs={'id':'itemDescription'})
|
||||
,dict(name='div', attrs={'id':'itemBody'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['tools']})
|
||||
, dict(name='div', attrs={'class':['share']})
|
||||
, dict(name='div', attrs={'class':['category']})
|
||||
, dict(name='div', attrs={'id':['comments']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'comments'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.descopera.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
55
resources/recipes/ecuisine.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
ecuisine.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EcuisineRo(BasicNewsRecipe):
|
||||
title = u'eCuisine'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti'
|
||||
publisher = 'eCuisine'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Retete,Bucatarie'
|
||||
encoding = 'utf-8'
|
||||
cover_url = ''
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'page-title'})
|
||||
, dict(name='div', attrs={'class':'content clearfix'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id':['recipe-tabs']})
|
||||
, dict(name='div', attrs={'class':['recipe-body-rating clearfix']})
|
||||
, dict(name='div', attrs={'class':['recipe-body-flags']})
|
||||
, dict(name='div', attrs={'id':['tweetmeme_button']})
|
||||
, dict(name='div', attrs={'class':['fbshare']})
|
||||
, dict(name='a', attrs={'class':['button-rounded']})
|
||||
, dict(name='div', attrs={'class':['recipe-body-related']})
|
||||
, dict(name='div', attrs={'class':['fbshare']})
|
||||
, dict(name='div', attrs={'class':['link-wrapper']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.ecuisine.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
43
resources/recipes/egirl.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
egirl.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EgirlRo(BasicNewsRecipe):
|
||||
title = u'egirl'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Necesar pentru tine'
|
||||
publisher = u'egirl'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.egirl.ro/images/egirlNou/logo_egirl.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'title_art'})
|
||||
, dict(name='div', attrs={'class':'content_style'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.egirl.ro/rss/egirl.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
53
resources/recipes/fhmro.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
fhm.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FHMro(BasicNewsRecipe):
|
||||
title = u'FHM Ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Pentru c\u0103 noi putem'
|
||||
publisher = 'FHM'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'contentMainTitle'})
|
||||
, dict(name='div', attrs={'class':'entry'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['ratingblock ']})
|
||||
, dict(name='a', attrs={'rel':['tag']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['ratingblock ']})
|
||||
, dict(name='div', attrs={'class':['socialize-containter']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.fhm.ro/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
48
resources/recipes/go4it.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
go4it.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Go4ITro(BasicNewsRecipe):
|
||||
title = u'go4it'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Gadgeturi, Lifestyle, Tehnologie'
|
||||
publisher = 'go4it'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Reviste,Ziare,IT'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.go4it.ro/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'subTitle clearfix'})
|
||||
, dict(name='div', attrs={'class':'story'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class':['data']})
|
||||
, dict(name='a', attrs={'class':['comments']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds2.feedburner.com/Go4itro-Stiri')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
43
resources/recipes/hotcity.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
hotcity.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HotcityRo(BasicNewsRecipe):
|
||||
title = u'Hotcity'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Cultura urban\u0103 feminin\u0103'
|
||||
publisher = 'Hotcity'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.hotcity.ro/i/bg_header.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'articol_title'})
|
||||
, dict(name='div', attrs={'class':'text'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.hotcity.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
52
resources/recipes/intrefete.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
intrefete.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Intrefete(BasicNewsRecipe):
|
||||
title = u'\u00centre fete'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Petrece ziua cu stil, afl\u0103 ce e nou \u00eentre fete'
|
||||
publisher = u'Intre fete'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/2/1401/16788/5878693/5/logo.jpg?width=300'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['author']})
|
||||
, dict(name='div', attrs={'class':['tags']})
|
||||
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='iframe', attrs={'scrolling':['no']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.intrefete.ro/rss/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
47
resources/recipes/kudika.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
kudika.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Kudika(BasicNewsRecipe):
|
||||
title = u'Kudika'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Revist\u0103 pentru femei'
|
||||
publisher = 'Kudika'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://img.kudika.ro/images/template/page-logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'header_recommend_article'}),
|
||||
dict(name='div', attrs={'id':'intertext_women'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['page_breadcrumbs']})
|
||||
, dict(name='div', attrs={'class':['standard']})
|
||||
, dict(name='div', attrs={'id':['recommend_allover']})
|
||||
]
|
||||
|
||||
feeds = [ (u'Feeds', u'http://www.kudika.ro/feed.xml') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
48
resources/recipes/nationalgeoro.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
natgeo.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NationalGeoRo(BasicNewsRecipe):
|
||||
title = u'National Geographic RO'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'S\u0103 avem grij\u0103 de planet\u0103'
|
||||
publisher = 'National Geographic'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://wiki.benecke.com/images/c/c4/NatGeographic_Logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class':'contentheading clearfix'})
|
||||
, dict(name='div', attrs={'class':'article-content'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['phocagallery']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.natgeo.ro/index.php?format=feed&type=rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
59
resources/recipes/romanialibera.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
romanialibera.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RomaniaLibera(BasicNewsRecipe):
|
||||
title = u'Rom\u00e2nia Liber\u0103'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Rom\u00e2nia Liber\u0103'
|
||||
publisher = u'Rom\u00e2nia Liber\u0103'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.romanialibera.ro/templates/lilac/images/sigla_1.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'articol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['art_actions']})
|
||||
, dict(name='div', attrs={'class':['stats']})
|
||||
, dict(name='div', attrs={'class':['data']})
|
||||
, dict(name='div', attrs={'class':['autori']})
|
||||
, dict(name='div', attrs={'class':['banda_explicatii_text']})
|
||||
, dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
|
||||
, dict(name='div', attrs={'class':['aceeasi_tema']})
|
||||
, dict(name='div', attrs={'class':['art_after_text']})
|
||||
, dict(name='div', attrs={'class':['navigare']})
|
||||
, dict(name='div', attrs={'id':['art_text_left']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'art_after_text'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.romanialibera.ro/rss.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
55
resources/recipes/sfin.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
sfin.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Sfin(BasicNewsRecipe):
|
||||
title = u'S\u0103pt\u0103m\u00e2na Financiar\u0103'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'SFIN'
|
||||
publisher = 'SFIN'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Economie,Business'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://img.9am.ro/images/logo_surse/saptamana_financiara.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'col2ContentLeft'})
|
||||
, dict(name='div', attrs={'id':'contentArticol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['infoArticol']})
|
||||
, dict(name='div', attrs={'class':['separator']})
|
||||
, dict(name='div', attrs={'class':['tags']})
|
||||
, dict(name='div', attrs={'id':['comments']})
|
||||
, dict(name='div', attrs={'class':'boxForm'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'tags'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.sfin.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
51
resources/recipes/superbebe.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
superbebe.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Superbebe(BasicNewsRecipe):
|
||||
title = u'Superbebe'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Superbebe'
|
||||
publisher = 'Superbebe'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Bebe,Mamici'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.superbebe.ro/images/superbebe.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'articol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['info']})
|
||||
, dict(name='div', attrs={'class':['tags']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['tags']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.superbebe.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -8,6 +8,8 @@ swiatkindle.pl
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class swiatkindle(BasicNewsRecipe):
|
||||
title = u'Swiat Kindle'
|
||||
description = u'Blog o czytniku Amazon Kindle. Wersje, ksi\u0105\u017cki, kupowanie i korzystanie w Polsce'
|
||||
|
54
resources/recipes/tabu.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
tabu.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TabuRo(BasicNewsRecipe):
|
||||
title = u'Tabu'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Cel mai curajos site de femei'
|
||||
publisher = 'Tabu'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.tabu.ro/img/tabu-logo2.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'Article'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['advertisementArticle']}),
|
||||
dict(name='div', attrs={'class':'voting_number'}),
|
||||
dict(name='div', attrs={'id':'number_votes'}),
|
||||
dict(name='div', attrs={'id':'rating_one'}),
|
||||
dict(name='div', attrs={'class':'float: right;'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'comments'}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.tabu.ro/rss_all.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
56
resources/recipes/unica.recipe
Normal file
@ -0,0 +1,56 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
unica.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Unica(BasicNewsRecipe):
|
||||
title = u'Unica'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Asa cum esti tu'
|
||||
publisher = 'Unica'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.unica.ro/fileadmin/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'sticky'})
|
||||
, dict(name='p', attrs={'class':'bodytext'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['top-links']})
|
||||
, dict(name='div', attrs={'id':['autor_name']})
|
||||
, dict(name='div', attrs={'class':['box-r']})
|
||||
, dict(name='div', attrs={'class':['category']})
|
||||
, dict(name='div', attrs={'class':['data']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class':'pager'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.unica.ro/rss.html')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -33,7 +33,7 @@ class HeuristicProcessor(object):
|
||||
self.any_multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>(\s*<div[^>]*>\s*</div>\s*)*){2,}', re.IGNORECASE)
|
||||
self.line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
|
||||
self.line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
|
||||
self.single_blank = re.compile(r'(\s*<p[^>]*>\s*</p>)', re.IGNORECASE)
|
||||
self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*</(p|div)>)', re.IGNORECASE)
|
||||
self.scene_break_open = '<p class="scenebreak" style="text-align:center; text-indent:0%; margin-top:1em; margin-bottom:1em; page-break-before:avoid">'
|
||||
self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]'
|
||||
self.common_in_text_beginnings = u'[\w\'\"“‘‛]'
|
||||
@ -451,8 +451,8 @@ class HeuristicProcessor(object):
|
||||
return html
|
||||
|
||||
def detect_whitespace(self, html):
|
||||
blanks_around_headings = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
|
||||
blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE)
|
||||
blanks_around_headings = re.compile(r'(?P<initparas>(<(p|div)[^>]*>\s*</(p|div)>\s*){1,}\s*)?(?P<heading><h(?P<hnum>\d+)[^>]*>.*?</h(?P=hnum)>)(?P<endparas>\s*(<(p|div)[^>]*>\s*</(p|div)>\s*){1,})?', re.IGNORECASE|re.DOTALL)
|
||||
blanks_n_nopunct = re.compile(r'(?P<initparas>(<p[^>]*>\s*</p>\s*){1,}\s*)?<p[^>]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W](</(span|[ibu]|em|strong|font)>\s*)*</p>(?P<endparas>\s*(<p[^>]*>\s*</p>\s*){1,})?', re.IGNORECASE|re.DOTALL)
|
||||
|
||||
def merge_header_whitespace(match):
|
||||
initblanks = match.group('initparas')
|
||||
@ -485,6 +485,21 @@ class HeuristicProcessor(object):
|
||||
return html
|
||||
|
||||
def detect_soft_breaks(self, html):
|
||||
line = '(?P<initline>'+self.line_open+'\s*(?P<init_content>.*?)'+self.line_close+')'
|
||||
line_two = '(?P<line_two>'+re.sub('(ou|in|cha)', 'linetwo_', self.line_open)+'\s*(?P<line_two_content>.*?)'+re.sub('(ou|in|cha)', 'linetwo_', self.line_close)+')'
|
||||
div_break_candidate_pattern = line+'\s*<div[^>]*>\s*</div>\s*'+line_two
|
||||
div_break_candidate = re.compile(r'%s' % div_break_candidate_pattern, re.IGNORECASE|re.UNICODE)
|
||||
|
||||
def convert_div_softbreaks(match):
|
||||
init_is_paragraph = self.check_paragraph(match.group('init_content'))
|
||||
line_two_is_paragraph = self.check_paragraph(match.group('line_two_content'))
|
||||
if init_is_paragraph and line_two_is_paragraph:
|
||||
return match.group('initline')+'\n<p class="softbreak" style="margin-top:.5em; page-break-before:avoid; text-align:center"> </p>\n'+match.group('line_two')
|
||||
else:
|
||||
return match.group(0)
|
||||
|
||||
html = div_break_candidate.sub(convert_div_softbreaks, html)
|
||||
|
||||
if not self.blanks_deleted and self.blanks_between_paragraphs:
|
||||
html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1em; page-break-before:avoid; text-align:center"> </p>', html)
|
||||
else:
|
||||
@ -523,6 +538,14 @@ class HeuristicProcessor(object):
|
||||
|
||||
return scene_break
|
||||
|
||||
def check_paragraph(self, content):
|
||||
content = re.sub('\s*</?span[^>]*>\s*', '', content)
|
||||
if re.match('.*[\"\'.!?:]$', content):
|
||||
#print "detected this as a paragraph"
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def abbyy_processor(self, html):
|
||||
abbyy_line = re.compile('((?P<linestart><p\sstyle="(?P<styles>[^\"]*?);?">)(?P<content>.*?)(?P<lineend></p>)|(?P<image><img[^>]*>))', re.IGNORECASE)
|
||||
empty_paragraph = '\n<p> </p>\n'
|
||||
@ -530,14 +553,6 @@ class HeuristicProcessor(object):
|
||||
self.previous_was_paragraph = False
|
||||
html = re.sub('</?a[^>]*>', '', html)
|
||||
|
||||
def check_paragraph(content):
|
||||
content = re.sub('\s*</?span[^>]*>\s*', '', content)
|
||||
if re.match('.*[\"\'.!?:]$', content):
|
||||
#print "detected this as a paragraph"
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def convert_styles(match):
|
||||
#print "raw styles are: "+match.group('styles')
|
||||
content = match.group('content')
|
||||
@ -565,7 +580,7 @@ class HeuristicProcessor(object):
|
||||
return blockquote_close_loop+'\n'+image+'\n'
|
||||
else:
|
||||
styles = match.group('styles').split(';')
|
||||
is_paragraph = check_paragraph(content)
|
||||
is_paragraph = self.check_paragraph(content)
|
||||
#print "styles for this line are: "+str(styles)
|
||||
split_styles = []
|
||||
for style in styles:
|
||||
|
@ -485,8 +485,8 @@ class MobiReader(object):
|
||||
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
||||
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', '\g<styletags>'+'\g<para>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<blockquote>(</blockquote[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<blockquote[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', '\g<para>'+'\g<blockquote>', self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
|
||||
|
||||
|
||||
def remove_random_bytes(self, html):
|
||||
|
@ -254,7 +254,8 @@ class EditorWidget(QWebView): # {{{
|
||||
f = QFontInfo(QApplication.font(self)).pixelSize()
|
||||
style = 'font-size: %dpx;' % (f,)
|
||||
|
||||
for body in self.page().mainFrame().documentElement().findAll('body'):
|
||||
# toList() is needed because PyQt on Debian is old/broken
|
||||
for body in self.page().mainFrame().documentElement().findAll('body').toList():
|
||||
body.setAttribute('style', style)
|
||||
self.page().setContentEditable(True)
|
||||
|
||||
|