Add .text extension as a synonym for .txt. Various Romanian recipe by Silviu Cotoara

This commit is contained in:
Kovid Goyal 2011-03-09 09:27:03 -07:00
commit c021ea827a
18 changed files with 488 additions and 2 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 495 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 414 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 840 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 556 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 437 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 728 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 494 B

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
cotidianul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Cotidianul(BasicNewsRecipe):
title = u'Cotidianul'
__author__ = u'Silviu Cotoar\u0103'
description = u''
publisher = u'Cotidianul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'titlu'})
, dict(name='div', attrs={'class':'gallery clearfix'})
, dict(name='div', attrs={'align':'justify'})
]
remove_tags = [
dict(name='div', attrs={'class':['space']})
, dict(name='div', attrs={'id':['title_desc']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['space']})
, dict(name='span', attrs={'class':['date']})
]
feeds = [
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ele.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Ele(BasicNewsRecipe):
title = u'Ele'
__author__ = u'Silviu Cotoar\u0103'
description = u'Dezv\u0103luie ceea ce e\u015fti'
publisher = u'Ele'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Femei'
encoding = 'utf-8'
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='h1', attrs={'class':'article_title'})
, dict(name='div', attrs={'class':'article_text'})
]
feeds = [
(u'Feeds', u'http://www.ele.ro/rss_must_read')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
revistafelicia.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Felicia(BasicNewsRecipe):
title = u'Revista Felicia'
__author__ = u'Silviu Cotoar\u0103'
description = u'O revist\u0103 pentru sufletul t\u0103u'
publisher = u'Revista Felicia'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste'
encoding = 'utf-8'
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'header'})
, dict(name='div', attrs={'id':'contentArticol'})
]
remove_tags = [
dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
, dict(name='div',attrs={'class':['content']})
]
feeds = [
(u'Feeds', u'http://www.revistafelicia.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
financiarul.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Financiarul(BasicNewsRecipe):
title = u'Financiarul'
__author__ = u'Silviu Cotoar\u0103'
description = u'FIN.ro'
publisher = u'Financiarul'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'col2ContentLeftL'})
]
remove_tags = [
dict(name='div',attrs={'class':['infoArticol']})
, dict(name='ul', attrs={'class':'navSectiuni'})
, dict(name='div', attrs={'class':'separator separatorTop'})
, dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
, dict(name='ul', attrs={'class':['related']})
, dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['related']})
]
feeds = [
(u'Feeds', u'http://www.financiarul.com/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
imperatortravel.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Imperatortravel(BasicNewsRecipe):
title = u'Imperator Travel'
__author__ = u'Silviu Cotoar\u0103'
description = u'C\u0103l\u0103torii'
publisher = u'Imperator Travel'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Turism,Calatorii'
encoding = 'utf-8'
cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'article first_main_article'})
]
remove_tags = [
dict(name='div', attrs={'class':['meta']})
, dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
, dict(name='div', attrs={'class':['connect_widget']})
, dict(name='ul', attrs={'class':['similar-posts']})
]
remove_tags_after = [
dict(name='ul', attrs={'class':['similar-posts']})
]
feeds = [
(u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
monden.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Monden(BasicNewsRecipe):
title = u'Monden'
__author__ = u'Silviu Cotoar\u0103'
description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
publisher = u'Monden'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Muzica'
encoding = 'utf-8'
cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div', attrs={'class':['postAuthor']})
, dict(name='div', attrs={'class':['postLike']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['postLike']})
]
feeds = [
(u'Feeds', u'http://www.monden.info/feed/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
promotor.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Promotor(BasicNewsRecipe):
title = u'Promotor'
__author__ = u'Silviu Cotoar\u0103'
description = u'Auto-moto'
publisher = u'Promotor'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,TV,Auto'
encoding = 'utf-8'
cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':'casetatitluarticol'})
, dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
, dict(name='div', attrs={'class':'textb'})
, dict(name='div', attrs={'class':'contentarticol'})
]
remove_tags = [
dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
, dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['etichetagry']})
, dict(name='span', attrs={'class':['textb']})
]
feeds = [
(u'Feeds', u'http://www.promotor.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
timesnewroman.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TimesNewRoman(BasicNewsRecipe):
title = u'Times New Roman'
__author__ = u'Silviu Cotoar\u0103'
description = u'Cotidian independent de umor voluntar'
publisher = u'Times New Roman'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Fun'
encoding = 'utf-8'
cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'page'})
]
remove_tags = [
dict(name='p', attrs={'class':['articleinfo']})
, dict(name='div',attrs={'class':['vergefacebooklike']})
, dict(name='div', attrs={'class':'cleared'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'cleared'})
]
feeds = [
(u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -25,7 +25,7 @@ class DRMError(ValueError):
class ParserError(ValueError): class ParserError(ValueError):
pass pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm', BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb'] 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']

View File

@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
name = 'TXT Input' name = 'TXT Input'
author = 'John Schember' author = 'John Schember'
description = 'Convert TXT files to HTML' description = 'Convert TXT files to HTML'
file_types = set(['txt', 'txtz']) file_types = set(['txt', 'txtz', 'text'])
options = set([ options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto', OptionRecommendation(name='paragraph_type', recommended_value='auto',