Pull from trunk

This commit is contained in:
Kovid Goyal 2009-02-07 14:08:35 -08:00
commit cbdf0ea09e
49 changed files with 1014 additions and 418 deletions

View File

@ -299,7 +299,6 @@ File ::2BCD9281-2CBC-CF0D-0E12-2CE11F6ED758 -name comic2epub.exe.local -parent 8
File ::EDE6F457-C83F-C5FA-9AF4-38FDFF17D929 -name PIL._imagingtk.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::EDE6F457-C83F-C5FA-9AF4-38FDFF17D929 -name PIL._imagingtk.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::09D0906E-3611-3DB7-32CF-A140585694A7 -name win32pdh.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::09D0906E-3611-3DB7-32CF-A140585694A7 -name win32pdh.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::4C84F0DC-7157-0C90-2062-180139B03E25 -name IM_MOD_RL_rgb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::4C84F0DC-7157-0C90-2062-180139B03E25 -name IM_MOD_RL_rgb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::F402F507-87C5-BDB1-80AE-AD3FF4A4BCE7 -name bzrlib._patiencediff_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::A732EDE7-4796-241F-BECA-68E59F88F8AF -name lrs2lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::A732EDE7-4796-241F-BECA-68E59F88F8AF -name lrs2lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::69072379-7D16-B9F7-9F39-3E6403C48267 -name IM_MOD_RL_xbm_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::69072379-7D16-B9F7-9F39-3E6403C48267 -name IM_MOD_RL_xbm_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::FBD11D98-D1E7-5DD9-BF02-01CE92518859 -name IM_MOD_RL_otb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::FBD11D98-D1E7-5DD9-BF02-01CE92518859 -name IM_MOD_RL_otb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
@ -365,7 +364,6 @@ File ::26741B21-C241-E100-8BB1-8B679BC3E662 -name configure.xml -parent 8E5D85A4
File ::7D491E89-C6D3-1E6E-F4BD-8E55260FE33E -name libexpat.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::7D491E89-C6D3-1E6E-F4BD-8E55260FE33E -name libexpat.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::A4910EB3-0F1C-F6F0-CD2D-16A64BBAA92B -name calibre-fontconfig.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::A4910EB3-0F1C-F6F0-CD2D-16A64BBAA92B -name calibre-fontconfig.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::8711327A-716D-B162-6AC6-2FB4AD071266 -name fb22lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::8711327A-716D-B162-6AC6-2FB4AD071266 -name fb22lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::0FDD3A7A-31F3-8089-CE32-D80EAA6F62B2 -name bzrlib._btree_serializer_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::476CB977-5155-D56F-26CA-EB243AEBBA99 -name unrar.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::476CB977-5155-D56F-26CA-EB243AEBBA99 -name unrar.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::2DA1CC8D-AF5C-3B03-2060-301DFE0356CC -name mobi2oeb.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::2DA1CC8D-AF5C-3B03-2060-301DFE0356CC -name mobi2oeb.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::2E2A9EDA-5386-444E-8479-557386794552 -name IM_MOD_RL_uil_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::2E2A9EDA-5386-444E-8479-557386794552 -name IM_MOD_RL_uil_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
@ -487,7 +485,6 @@ File ::AA761ACD-B728-2324-AA75-B20A2A79F125 -name lrf2lrs.exe -parent 8E5D85A4-7
File ::95434C76-22F5-B9CE-6194-6E1B1EE3232D -name IM_MOD_RL_info_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::95434C76-22F5-B9CE-6194-6E1B1EE3232D -name IM_MOD_RL_info_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::AAF45D03-322F-5553-63A7-312DB754A20B -name _ctypes.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::AAF45D03-322F-5553-63A7-312DB754A20B -name _ctypes.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::C3D351CA-A8D8-AB35-55D9-5AACF8DB37D1 -name python26.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::C3D351CA-A8D8-AB35-55D9-5AACF8DB37D1 -name python26.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::2F90B52F-A728-2CA4-5688-0283674695B7 -name _elementtree.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::B50B66A1-FB65-FAD5-1DD7-E894ACC07464 -name QtSvg4.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::B50B66A1-FB65-FAD5-1DD7-E894ACC07464 -name QtSvg4.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::906FF13D-D993-7192-7EA5-6D15A5A24BFB -name CORE_RL_png_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::906FF13D-D993-7192-7EA5-6D15A5A24BFB -name CORE_RL_png_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
File ::5D368661-6BF0-D6AF-7C1A-87646864EB4B -name delegates.xml -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::5D368661-6BF0-D6AF-7C1A-87646864EB4B -name delegates.xml -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
@ -552,7 +549,7 @@ SetupType ::D9ADE41C-B744-690C-2CED-CF826BF03D2E -setup Install -active Yes -pla
InstallComponent 3EA07B17-04D8-6508-B535-96CC7173B49A -setup Install -type pane -conditions D7F585DB-0DEC-A94E-DAB0-94D558D82764 -title {Welcome Screen} -component Welcome -command insert -active Yes -parent StandardInstall InstallComponent 3EA07B17-04D8-6508-B535-96CC7173B49A -setup Install -type pane -conditions D7F585DB-0DEC-A94E-DAB0-94D558D82764 -title {Welcome Screen} -component Welcome -command insert -active Yes -parent StandardInstall
Condition D7F585DB-0DEC-A94E-DAB0-94D558D82764 -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id D7F585DB-0DEC-A94E-DAB0-94D558D82764 Condition D7F585DB-0DEC-A94E-DAB0-94D558D82764 -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id D7F585DB-0DEC-A94E-DAB0-94D558D82764
InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command reorder -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
Condition ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -active Yes -parent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E Condition ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -active Yes -parent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E
InstallComponent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -setup Install -type action -conditions 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -title {Set Virtual Text} -component SetVirtualText -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A InstallComponent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -setup Install -type action -conditions 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -title {Set Virtual Text} -component SetVirtualText -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
Condition 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -active Yes -parent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -title {String Is Condition} -component StringIsCondition -TreeObject::id 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB Condition 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -active Yes -parent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -title {String Is Condition} -component StringIsCondition -TreeObject::id 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB

View File

@ -12,7 +12,7 @@ LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
PDFTOHTML = 'C:\\pdftohtml\\pdftohtml.exe' PDFTOHTML = 'C:\\pdftohtml\\pdftohtml.exe'
IMAGEMAGICK_DIR = 'C:\\ImageMagick' IMAGEMAGICK_DIR = 'C:\\ImageMagick'
FONTCONFIG_DIR = 'C:\\fontconfig' FONTCONFIG_DIR = 'C:\\fontconfig'
VC90 = r'C:\Program Files\Microsoft Visual Studio 9.0\VC\redist\x86\Microsoft.VC90.CRT' VC90 = r'C:\VC90.CRT'
import sys, os, py2exe, shutil, zipfile, glob, subprocess, re import sys, os, py2exe, shutil, zipfile, glob, subprocess, re
from distutils.core import setup from distutils.core import setup

View File

@ -21,6 +21,8 @@ import mechanize
mimetypes.add_type('application/epub+zip', '.epub') mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs') mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('application/x-sony-bbeb', '.lrf') mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt') mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.4.133' __version__ = '0.4.134'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -467,7 +467,7 @@ class Parser(PreProcessor, LoggingInterface):
if self.htmlfile.is_binary: if self.htmlfile.is_binary:
raise ValueError('Not a valid HTML file: '+self.htmlfile.path) raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip() src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
src = src.replace('\x00', '') src = src.replace('\x00', '').replace('\r', ' ')
src = self.preprocess(src) src = self.preprocess(src)
# lxml chokes on unicode input when it contains encoding declarations # lxml chokes on unicode input when it contains encoding declarations
for pat in ENCODING_PATS: for pat in ENCODING_PATS:

View File

@ -17,6 +17,7 @@ import types
import re import re
import copy import copy
from itertools import izip from itertools import izip
from xml.dom import SyntaxErr as CSSSyntaxError
import cssutils import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties CSSValueList, cssproperties
@ -291,11 +292,15 @@ class Style(object):
def _apply_style_attr(self): def _apply_style_attr(self):
attrib = self._element.attrib attrib = self._element.attrib
if 'style' in attrib: if 'style' not in attrib:
css = attrib['style'].split(';') return
css = filter(None, map(lambda x: x.strip(), css)) css = attrib['style'].split(';')
css = filter(None, (x.strip() for x in css))
try:
style = CSSStyleDeclaration('; '.join(css)) style = CSSStyleDeclaration('; '.join(css))
self._style.update(self._stylizer.flatten_style(style)) except CSSSyntaxError:
return
self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self): def _has_parent(self):
return (self._element.getparent() is not None) return (self._element.getparent() is not None)

Binary file not shown.

After

Width:  |  Height:  |  Size: 983 B

View File

@ -369,13 +369,14 @@ class Main(MainWindow, Ui_MainWindow):
if r == QSystemTrayIcon.Trigger: if r == QSystemTrayIcon.Trigger:
if self.isVisible(): if self.isVisible():
for window in QApplication.topLevelWidgets(): for window in QApplication.topLevelWidgets():
if isinstance(window, (MainWindow, QDialog)): if isinstance(window, (MainWindow, QDialog)) and window.isVisible():
window.hide() window.hide()
setattr(window, '__systray_minimized', True)
else: else:
for window in QApplication.topLevelWidgets(): for window in QApplication.topLevelWidgets():
if isinstance(window, (MainWindow, QDialog)): if getattr(window, '__systray_minimized', False):
if window not in (self.device_error_dialog, self.jobs_dialog): window.show()
window.show() setattr(window, '__systray_minimized', False)
def do_default_sync(self, checked): def do_default_sync(self, checked):

View File

@ -27,6 +27,7 @@ recipe_modules = ['recipe_' + r for r in (
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english', 'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda', 'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz', 'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
'honoluluadvertiser', 'starbulletin', 'exiled',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -1,32 +1,39 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
ambito.com ambito.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Ambito(BasicNewsRecipe): class Ambito(BasicNewsRecipe):
title = 'Ambito.com' title = 'Ambito.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas' description = 'Informacion Libre las 24 horas'
publisher = 'Ambito.com'
category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False encoding = 'iso-8859-1'
encoding = 'iso--8859-1'
language = _('Spanish')
cover_url = 'http://www.ambito.com/img/logo_.jpg' cover_url = 'http://www.ambito.com/img/logo_.jpg'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Argentina' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link'])]
feeds = [ feeds = [
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' ) (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' ) ,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?') return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,25 +7,33 @@ b92.net
''' '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class B92(BasicNewsRecipe): class B92(BasicNewsRecipe):
title = u'B92' title = 'B92'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
language = _('Serbian')
description = 'Dnevne vesti iz Srbije i sveta' description = 'Dnevne vesti iz Srbije i sveta'
oldest_article = 7 oldest_article = 2
publisher = 'B92.net'
category = 'news, politics, Serbia'
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://static.b92.net/images/fp/logo.gif' cover_url = 'http://static.b92.net/images/fp/logo.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ] keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Serbia' , '--publisher', publisher
, '--publisher', 'B92'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [ feeds = [
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml') (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
@ -44,3 +52,16 @@ class B92(BasicNewsRecipe):
if biz: if biz:
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
return nurl return nurl
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn'
soup.html['lang'] = 'sr-Latn'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(name='img',align=True):
del item['align']
item.insert(0,'<br /><br />')
return soup
language = _('Serbian')

View File

@ -5,31 +5,49 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
blic.rs blic.rs
''' '''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe): class Blic(BasicNewsRecipe):
title = u'Blic' title = u'Blic'
__author__ = 'Darko Miletic' __author__ = u'Darko Miletic'
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
oldest_article = 7 publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Serbia' , '--publisher', publisher
, '--publisher', 'Blic'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'single_news'}) ] keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
feeds = [ (u'Vesti', u'http://www.blic.rs/rssall.php')] feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
remove_tags = [dict(name=['object','link'])]
def print_version(self, url): def print_version(self, url):
start_url, question, rest_url = url.partition('?') start_url, question, rest_url = url.partition('?')
return u'http://www.blic.rs/_print.php?' + rest_url return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,32 +1,36 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
clarin.com clarin.com
''' '''
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Clarin(BasicNewsRecipe): class Clarin(BasicNewsRecipe):
title = 'Clarin' title = 'Clarin'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina y mundo' description = 'Noticias de Argentina y mundo'
publisher = 'Grupo Clarin'
category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
language = _('Spanish')
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg') cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Argentina' , '--publisher', publisher
, '--publisher', 'Grupo Clarin'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [ remove_tags = [
dict(name='a' , attrs={'class':'Imp' }) dict(name='a' , attrs={'class':'Imp' })
,dict(name='div' , attrs={'class':'Perma' }) ,dict(name='div' , attrs={'class':'Perma' })
@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe):
rest = artl.partition('-0')[-1] rest = artl.partition('-0')[-1]
lmain = rest.partition('.')[0] lmain = rest.partition('.')[0]
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -5,37 +5,47 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
danas.rs danas.rs
''' '''
import string,re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe): class Danas(BasicNewsRecipe):
title = 'Danas' title = u'Danas'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.' description = 'Vesti'
publisher = 'Danas d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = False
remove_javascript = True
use_embedded_content = False use_embedded_content = False
cover_url = 'http://www.danas.rs/images/basic/danas.gif' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Serbia' , '--publisher', publisher
, '--publisher', 'Danas'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'id':'left'}) ] keep_only_tags = [dict(name='div', attrs={'id':'left'})]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':'width_1_4' }) dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
,dict(name='div', attrs={'class':'metaClanka' }) ,dict(name='div', attrs={'id':'comments'})
,dict(name='div', attrs={'id':'comments' }) ,dict(name=['object','link'])
,dict(name='div', attrs={'class':'baner' })
,dict(name='div', attrs={'class':'slikaClanka'})
] ]
feeds = [(u'Vesti', u'http://www.danas.rs/rss/rss.asp')] feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def print_version(self, url): def preprocess_html(self, soup):
return url + '&action=print' mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DeStandaard(BasicNewsRecipe): class DeStandaard(BasicNewsRecipe):
title = u'De Standaard' title = u'De Standaard'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
language = _('French') language = _('Dutch')
description = u'News from Belgium' description = u'News from Belgium'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -13,6 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
description = u'News from Belgium' description = u'News from Belgium'
oldest_article = 7 oldest_article = 7
language = _('Dutch')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False

View File

@ -5,32 +5,37 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
emol.com emol.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ElMercurio(BasicNewsRecipe): class ElMercurio(BasicNewsRecipe):
title = 'El Mercurio online' title = 'El Mercurio online'
language = _('Spanish')
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile' description = 'El sitio de noticias online de Chile'
publisher = 'El Mercurio'
category = 'news, politics, Chile'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif' cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Chile' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':'despliegue-txt_750px'}) dict(name='div', attrs={'class':'despliegue-txt_750px'})
,dict(name='div', attrs={'id':'div_cuerpo_participa'}) ,dict(name='div', attrs={'id':'div_cuerpo_participa'})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'}) dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']}) ,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
@ -46,3 +51,11 @@ class ElMercurio(BasicNewsRecipe):
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7') ,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
] ]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
elargentino.com elargentino.com
''' '''
@ -12,20 +12,24 @@ class ElArgentino(BasicNewsRecipe):
title = 'ElArgentino.com' title = 'ElArgentino.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas' description = 'Informacion Libre las 24 horas'
language = _('Spanish') publisher = 'ElArgentino.com'
category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png' cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Argentina' , '--category', category
, '--publisher' , 'ElArgentino.com' , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':'noprint' }) dict(name='div', attrs={'id':'noprint' })
,dict(name='div', attrs={'class':'encabezadoImprimir'}) ,dict(name='div', attrs={'class':'encabezadoImprimir'})
@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe):
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
soup.prettify() for item in soup.findAll(style=True):
del item['style']
return soup return soup
language = _('Spanish')

View File

@ -12,35 +12,49 @@ class ElMundo(BasicNewsRecipe):
title = 'El Mundo' title = 'El Mundo'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Spain' description = 'News from Spain'
language = _('Spanish') publisher = 'El Mundo'
category = 'news, politics, Spain'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'iso8859_15' encoding = 'iso8859_15'
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif' cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Spain' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
,dict(name='div', attrs={'class':['contenido_noticia_01']})
]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']}) dict(name='div', attrs={'class':['herramientas','publicidad_google']})
,dict(name='div', attrs={'id':'modulo_multimedia' }) ,dict(name='div', attrs={'id':'modulo_multimedia' })
,dict(name=['object','script','link', 'a']) ,dict(name='ul', attrs={'class':'herramientas' })
,dict(name='ul', attrs={'class':'herramientas'}) ,dict(name=['object','link'])
] ]
feeds = [ feeds = [
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' ) (u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' ) ,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' ) ,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' ) ,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' ) ,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26') ,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -8,25 +8,28 @@ estadao.com.br
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Estadao(BasicNewsRecipe): class Estadao(BasicNewsRecipe):
title = 'O Estado de S. Paulo' title = 'O Estado de S. Paulo'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Brasil' description = 'News from Brasil in Portugese'
language = _('Spanish') publisher = 'O Estado de S. Paulo'
category = 'news, politics, Brasil'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
cover_url = 'http://www.estadao.com.br/img/logo_estadao.png' cover_url = 'http://www.estadao.com.br/img/logo_estadao.png'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Brasil' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'c1'})] keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
remove_tags = [ remove_tags = [
@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe):
ifr = soup.find('iframe') ifr = soup.find('iframe')
if ifr: if ifr:
ifr.extract() ifr.extract()
for item in soup.findAll(style=True):
del item['style']
return soup return soup
language = _('Portugese')

View File

@ -0,0 +1,51 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
exiledonline.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Exiled(BasicNewsRecipe):
title = 'Exiled Online'
__author__ = 'Darko Miletic'
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
publisher = 'Exiled Online'
language = _('English')
category = 'news, politics, international'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
remove_tags = [
dict(name=['object','link'])
,dict(name='div', attrs={'class':'info'})
,dict(name='div', attrs={'id':['comments','navig']})
]
feeds = [(u'Articles', u'http://exiledonline.com/feed/' )]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
soup.head.insert(0,mtag)
return soup

View File

@ -7,37 +7,46 @@ granma.cubaweb.cu
''' '''
import urllib import urllib
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Granma(BasicNewsRecipe): class Granma(BasicNewsRecipe):
title = 'Diario Granma' title = 'Diario Granma'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
language = _('Spanish')
description = 'Organo oficial del Comite Central del Partido Comunista de Cuba' description = 'Organo oficial del Comite Central del Partido Comunista de Cuba'
publisher = 'Granma'
category = 'news, politics, Cuba'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg' cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Cuba' , '--category', category
, '--publisher' , title , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='table', attrs={'height':'466'})] keep_only_tags = [dict(name='table', attrs={'height':'466'})]
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )] feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
def preprocess_html(self, soup): def preprocess_html(self, soup):
del soup.body.table['style'] mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
rtag = soup.find('td', attrs={'height':'458'}) soup.head.insert(0,mtag)
if rtag: for item in soup.findAll('table'):
del rtag['style'] if item.has_key('width'):
del item['width']
if item.has_key('height'):
del item['height']
for item in soup.findAll(style=True):
del item['style']
return soup return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
harpers.org - paid subscription/ printed issue articles harpers.org - paid subscription/ printed issue articles
This recipe only get's article's published in text format This recipe only get's article's published in text format
@ -9,13 +9,15 @@ images and pdf's are ignored
''' '''
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Harpers_full(BasicNewsRecipe): class Harpers_full(BasicNewsRecipe):
title = u"Harper's Magazine - articles from printed edition" title = u"Harper's Magazine - articles from printed edition"
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic'
description = u"Harper's Magazine: Founded June 1850." description = u"Harper's Magazine: Founded June 1850."
language = _('English') publisher = "Harpers's"
category = 'news, politics, USA'
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -26,6 +28,15 @@ class Harpers_full(BasicNewsRecipe):
INDEX = strftime('http://www.harpers.org/archive/%Y/%m') INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
LOGIN = 'http://www.harpers.org' LOGIN = 'http://www.harpers.org'
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif') cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ] keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
remove_tags = [ remove_tags = [
@ -60,3 +71,10 @@ class Harpers_full(BasicNewsRecipe):
,'description':'' ,'description':''
}) })
return [(soup.head.title.string, articles)] return [(soup.head.title.string, articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('English')

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
honoluluadvertiser.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Honoluluadvertiser(BasicNewsRecipe):
title = 'Honolulu Advertiser'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
publisher = 'Honolulu Advertiser'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
cover_url = 'http://www.honoluluadvertiser.com/graphics/branding.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='td')]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
return soup
def print_version(self, url):
ubody, sep, rest = url.rpartition('/-1/')
root, sep2, article_id = ubody.partition('/article/')
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
infobae.com infobae.com
''' '''
@ -12,21 +12,23 @@ class Infobae(BasicNewsRecipe):
title = 'Infobae.com' title = 'Infobae.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas' description = 'Informacion Libre las 24 horas'
publisher = 'Infobae.com'
category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'iso-8859-1' encoding = 'iso-8859-1'
cover_url = 'http://www.infobae.com/imgs/header/header.gif' cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Argentina' , '--category', category
, '--publisher' , 'Infobae.com' , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [ feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
@ -39,3 +41,12 @@ class Infobae(BasicNewsRecipe):
main, sep, article_part = url.partition('contenidos/') main, sep, article_part = url.partition('contenidos/')
article_id, rsep, rrest = article_part.partition('-') article_id, rsep, rrest = article_part.partition('-')
return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -12,20 +12,24 @@ class JBOnline(BasicNewsRecipe):
title = 'Jornal Brasileiro Online' title = 'Jornal Brasileiro Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Brasil' description = 'News from Brasil'
publisher = 'Jornal Brasileiro'
category = 'news, politics, Brasil'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif' cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Brasil' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})] keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
remove_tags = [dict(name=['script','object','form'])] remove_tags = [dict(name=['script','object','form'])]
@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe):
ifr = soup.find('iframe') ifr = soup.find('iframe')
if ifr: if ifr:
ifr.extract() ifr.extract()
item = soup.find('div', attrs={'id':'corpoNoticia'}) for item in soup.findAll(style=True):
if item: del item['style']
del item['style']
return soup return soup
language = _('Portugese')

View File

@ -6,28 +6,35 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
jutarnji.hr jutarnji.hr
''' '''
import string, re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Jutarnji(BasicNewsRecipe): class Jutarnji(BasicNewsRecipe):
title = 'Jutarnji' title = u'Jutarnji'
__author__ = 'Darko Miletic' __author__ = u'Darko Miletic'
description = 'Online izdanje Jutarnjeg lista' description = u'Hrvatski portal'
publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
simultaneous_downloads = 1 simultaneous_downloads = 1
delay = 1 delay = 1
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Croatia' , '--publisher', publisher
, '--publisher', 'Europapress holding d.o.o.'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [ remove_tags = [
@ -49,11 +56,16 @@ class Jutarnji(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
main, split, rest = url.partition('.jl') main, split, rest = url.partition('.jl')
rmain, rsplit, rrest = main.rpartition(',') rmain, rsplit, rrest = main.rpartition(',')
return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
soup.prettify() mtag = '<meta http-equiv="Content-Language" content="hr"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup return soup

View File

@ -13,21 +13,25 @@ class Juventudrebelde(BasicNewsRecipe):
title = 'Juventud Rebelde' title = 'Juventud Rebelde'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Diario de la Juventud Cubana' description = 'Diario de la Juventud Cubana'
publisher = 'Juventud rebelde'
category = 'news, politics, Cuba'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg') cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Cuba' , '--category', category
, '--publisher' , title , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
feeds = [ feeds = [
@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe):
,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' ) ,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
] ]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -5,7 +5,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
juventudrebelde.co.cu juventudrebelde.co.cu
''' '''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -13,22 +12,33 @@ class Juventudrebelde_english(BasicNewsRecipe):
title = 'Juventud Rebelde in english' title = 'Juventud Rebelde in english'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'The newspaper of Cuban Youth' description = 'The newspaper of Cuban Youth'
language = _('English') publisher = 'Juventud Rebelde'
category = 'news, politics, Cuba'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'iso-8859-1' encoding = 'iso-8859-1'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Cuba' , '--category', category
, '--publisher' , title , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'read'})] keep_only_tags = [dict(name='div', attrs={'class':'read'})]
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )] feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('English')

View File

@ -11,25 +11,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaCuarta(BasicNewsRecipe): class LaCuarta(BasicNewsRecipe):
title = 'La Cuarta' title = 'La Cuarta'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile' description = 'La Cuarta Cibernetica: El Diario popular'
publisher = 'CODISA, Consorcio Digital S.A.'
category = 'news, politics, entertainment, Chile'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Chile' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ] keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
remove_tags = [ remove_tags = [
dict(name='script') dict(name='ul')
,dict(name='ul')
,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']}) ,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
,dict(name='div', attrs={'class':['par ad-1','par ad-2']}) ,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
,dict(name='input') ,dict(name='input')
@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe):
,dict(name='strong', text='PUBLICIDAD') ,dict(name='strong', text='PUBLICIDAD')
] ]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')] feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
language = _('Spanish')

View File

@ -12,21 +12,24 @@ class LaSegunda(BasicNewsRecipe):
title = 'La Segunda' title = 'La Segunda'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile' description = 'El sitio de noticias online de Chile'
language = _('Spanish') publisher = 'La Segunda'
category = 'news, politics, Chile'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif' cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Chile' , '--category', category
, '--publisher' , title , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='table')] keep_only_tags = [dict(name='table')]
feeds = [ feeds = [
@ -46,3 +49,13 @@ class LaSegunda(BasicNewsRecipe):
rest, sep, article_id = url.partition('index.asp?idnoticia=') rest, sep, article_id = url.partition('index.asp?idnoticia=')
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(name='table', width=True):
del item['width']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -12,20 +12,24 @@ class LaTercera(BasicNewsRecipe):
title = 'La Tercera' title = 'La Tercera'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile' description = 'El sitio de noticias online de Chile'
publisher = 'La Tercera'
category = 'news, politics, Chile'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Chile' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
remove_tags = [ remove_tags = [
dict(name='script') dict(name='script')
@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe):
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657') ,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
] ]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
lanacion.com.ar lanacion.com.ar
''' '''
@ -11,20 +11,23 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Lanacion(BasicNewsRecipe): class Lanacion(BasicNewsRecipe):
title = 'La Nacion' title = 'La Nacion'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!' description = 'Noticias de Argentina y el resto del mundo'
publisher = 'La Nacion'
category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_javascript = True
no_stylesheets = True
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Argentina' , '--publisher', publisher
, '--publisher', 'La Nacion SA'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})] keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe):
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' ) ,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
] ]
def get_cover_url(self): def preprocess_html(self, soup):
index = 'http://www.lanacion.com.ar' mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
cover_url = None soup.head.insert(0,mtag)
soup = self.index_to_soup(index) for item in soup.findAll(style=True):
cover_item = soup.find('img',attrs={'class':'logo'}) del item['style']
if cover_item: return soup
cover_url = index + cover_item['src']
return cover_url language = _('Spanish')

View File

@ -13,20 +13,24 @@ class LaNacionChile(BasicNewsRecipe):
title = 'La Nacion Chile' title = 'La Nacion Chile'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile' description = 'El sitio de noticias online de Chile'
publisher = 'La Nacion'
category = 'news, politics, Chile'
oldest_article = 2 oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif' cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Chile' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'bloque'})] keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')] feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe):
item = soup.find('a', attrs={'href':'javascript:window.close()'}) item = soup.find('a', attrs={'href':'javascript:window.close()'})
if item: if item:
item.extract() item.extract()
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
laprensa.com.ar laprensa.com.ar
''' '''
@ -13,20 +13,24 @@ class LaPrensa(BasicNewsRecipe):
title = 'La Prensa' title = 'La Prensa'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas' description = 'Informacion Libre las 24 horas'
publisher = 'La Prensa'
category = 'news, politics, Argentina'
oldest_article = 7 oldest_article = 7
language = _('Spanish')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif' cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Argentina' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [ feeds = [
(u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' ) (u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
,(u'Economia' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' ) ,(u'Economia' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
del soup.body['onload'] del soup.body['onload']
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup return soup
language = _('Spanish')

View File

@ -7,12 +7,15 @@ nin.co.yu
''' '''
import re, urllib import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe): class Nin(BasicNewsRecipe):
title = 'NIN online' title = 'NIN online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Nedeljne informativne novine' description = 'Nedeljne informativne novine'
publisher = 'NIN'
category = 'news, politics, Serbia'
no_stylesheets = True no_stylesheets = True
oldest_article = 15 oldest_article = 15
simultaneous_downloads = 1 simultaneous_downloads = 1
@ -22,12 +25,18 @@ class Nin(BasicNewsRecipe):
PREFIX = 'http://www.nin.co.yu' PREFIX = 'http://www.nin.co.yu'
INDEX = PREFIX + '/?change_lang=ls' INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true' LOGIN = PREFIX + '/?logout=true'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, politics, Serbia' , '--category', category
, '--publisher' , 'NIN' , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self): def get_browser(self):
@ -53,3 +62,12 @@ class Nin(BasicNewsRecipe):
if link_item: if link_item:
cover_url = self.PREFIX + link_item['src'] cover_url = self.PREFIX + link_item['src']
return cover_url return cover_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -5,31 +5,45 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
novosti.rs novosti.rs
''' '''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Novosti(BasicNewsRecipe): class Novosti(BasicNewsRecipe):
title = 'Vecernje Novosti' title = u'Vecernje Novosti'
__author__ = 'Darko Miletic' __author__ = u'Darko Miletic'
description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti' description = u'Vesti'
publisher = 'Kompanija Novosti'
category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Serbia' , '--publisher', publisher
, '--publisher', 'Novosti AD'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'jednaVest'}) ] keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})]
remove_tags_after = dict(name='div', attrs={'class':'info_bottom'}) remove_tags = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})]
remove_tags = [
dict(name='div', attrs={'class':'info'})
,dict(name='div', attrs={'class':'info_bottom'})
]
feeds = [ (u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -6,35 +6,55 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
nspm.rs nspm.rs
''' '''
import string,re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Nspm(BasicNewsRecipe): class Nspm(BasicNewsRecipe):
title = u'Nova srpska politicka misao' title = u'Nova srpska politicka misao'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Casopis za politicku teoriju i drustvena istrazivanja' description = 'Casopis za politicku teoriju i drustvena istrazivanja'
publisher = 'NSPM'
category = 'news, politics, Serbia'
oldest_article = 7 oldest_article = 7
language = _('Serbian')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l' INDEX = 'http://www.nspm.rs/?alphabet=l'
cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg' encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, politics, Serbia' , '--publisher', publisher
, '--publisher', 'IIC NSPM' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name='a')]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.open(self.INDEX) br.open(self.INDEX)
return br return br
feeds = [ (u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')] feeds = [(u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')]
def print_version(self, url): def print_version(self, url):
return url.replace('.html','/stampa.html') return url.replace('.html','/stampa.html')
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag:
ftag['content'] = 'sr-Latn-RS'
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -12,20 +12,24 @@ class OGlobo(BasicNewsRecipe):
title = 'O Globo' title = 'O Globo'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Brasil' description = 'News from Brasil'
publisher = 'O Globo'
category = 'news, politics, Brasil'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
language = _('Spanish')
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://oglobo.globo.com/_img/o-globo.png' cover_url = 'http://oglobo.globo.com/_img/o-globo.png'
remove_javascript = True
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Brasil' , '--category', category
, '--publisher' , title , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})] keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
remove_tags = [ remove_tags = [
@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe):
,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml') ,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml') ,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -1,32 +1,37 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
pagina12.com.ar pagina12.com.ar
''' '''
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe): class Pagina12(BasicNewsRecipe):
title = u'Pagina/12' title = u'Pagina/12'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo' description = 'Noticias de Argentina y el resto del mundo'
language = _('Spanish') publisher = 'La Pagina S.A.'
category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg') cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
remove_javascript = True
use_embedded_content = False
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment', description
, '--category' , 'news, Argentina' , '--category', category
, '--publisher' , 'La Pagina S.A.' , '--publisher', publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':'volver'}) dict(name='div', attrs={'id':'volver'})
@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/') return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,30 +6,53 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
pescanik.net pescanik.net
''' '''
import string,re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe): class Pescanik(BasicNewsRecipe):
title = 'Pescanik' title = 'Pescanik'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Pescanik' description = 'Pescanik'
publisher = 'Pescanik'
category = 'news, politics, Serbia'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
html2lrf_options = ['--base-font-size', '10'] remove_javascript = True
html2epub_options = 'base_font_size = "10pt"' encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png" cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_after = dict(name='div', attrs={'class':'article_seperator'}) remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'})
remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})] ,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link'])
]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')] feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url): def print_version(self, url):
nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php') nurl = url.replace('/index.php','/index2.php')
return nurl + '&pop=1&page=0' return nurl + '&pop=1&page=0'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -5,37 +5,61 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
''' '''
politika.rs politika.rs
''' '''
import string,re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Politika(BasicNewsRecipe): class Politika(BasicNewsRecipe):
title = 'Politika Online' title = u'Politika Online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Najstariji dnevni list na Balkanu' description = 'Najstariji dnevni list na Balkanu'
publisher = 'Politika novine i Magazini d.o.o'
category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
extra_css = '.content_center_border {text-align: left;}'
use_embedded_content = False use_embedded_content = False
cover_url = 'http://www.politika.rs:8080/images/politika.gif' remove_javascript = True
encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, Serbia' , '--publisher', publisher
, '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'contentcenter'}) ] keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'})
remove_tags = [
dict(name='div', attrs={'class':['send_print','txt-komentar']})
,dict(name=['object','link','a'])
,dict(name='h1', attrs={'class':'box_header-tags'})
]
feeds = [ feeds = [
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' ) (u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' ) ,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' ) ,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' ) ,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' ) ,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' ) ,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' ) ,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
] ]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag:
ftag['align'] = 'left'
return soup

View File

@ -17,7 +17,7 @@ class PetersburgTimes(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = _('Russian') language = _('English')
INDEX = 'http://www.sptimes.ru' INDEX = 'http://www.sptimes.ru'
def parse_index(self): def parse_index(self):

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
starbulletin.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Starbulletin(BasicNewsRecipe):
title = 'Honolulu Star-Bulletin'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news"
publisher = 'Honolulu Star-Bulletin'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
cover_url = 'http://media.starbulletin.com/designimages/spacer.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ dict(name='div', attrs={'id':'storyColoumn'}) ]
remove_tags = [
dict(name=['object','link'])
,dict(name='span', attrs={'id':'printdesc'})
,dict(name='div' , attrs={'class':'lightGreyBox storyTools clearAll'})
,dict(name='div' , attrs={'id':'breadcrumbs'})
]
feeds = [
(u'Headlines', u'http://www.starbulletin.com/starbulletin_headlines.rss' )
,(u'News', u'http://www.starbulletin.com/news/index.rss' )
,(u'Sports', u'http://www.starbulletin.com/sports/index.rss' )
,(u'Features', u'http://www.starbulletin.com/features/index.rss' )
,(u'Editorials', u'http://www.starbulletin.com/editorials/index.rss' )
,(u'Business', u'http://www.starbulletin.com/business/index.rss' )
,(u'Travel', u'http://www.starbulletin.com/travel/index.rss' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
return soup

View File

@ -1,13 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
vijesti.cg.yu vijesti.cg.yu
''' '''
import string,re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -15,23 +15,35 @@ class Vijesti(BasicNewsRecipe):
title = 'Vijesti' title = 'Vijesti'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Montenegro' description = 'News from Montenegro'
oldest_article = 2 publisher = 'Daily Press Vijesti'
category = 'news, politics, Montenegro'
oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False remove_javascript = True
encoding = 'cp1250' encoding = 'cp1250'
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif' cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Montenegro'
, '--publisher' , 'Daily Press Vijesti'
]
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})] keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
remove_tags = [
dict(name='div', attrs={'align':'right'})
,dict(name=['object','link'])
]
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )] feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -39,4 +51,10 @@ class Vijesti(BasicNewsRecipe):
soup.html['lang'] = 'sr-Latn-ME' soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>' mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll('img'):
if item.has_key('align'):
del item['align']
item.insert(0,'<br /><br />')
return soup return soup
language = _('Serbian')

View File

@ -6,26 +6,34 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
vreme.com vreme.com
''' '''
import string,re import re
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Vreme(BasicNewsRecipe): class Vreme(BasicNewsRecipe):
title = 'Vreme'
title = 'Vreme' __author__ = 'Darko Miletic'
__author__ = 'Darko Miletic' description = 'Politicki Nedeljnik Srbije'
description = 'Politicki Nedeljnik Srbije' publisher = 'Vreme d.o.o.'
category = 'news, politics, Serbia'
no_stylesheets = True no_stylesheets = True
remove_javascript = True
needs_subscription = True needs_subscription = True
INDEX = 'http://www.vreme.com' INDEX = 'http://www.vreme.com'
LOGIN = 'http://www.vreme.com/account/index.php' LOGIN = 'http://www.vreme.com/account/index.php'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--base-font-size', '10' , '--category', category
, '--category', 'news, politics, Serbia' , '--publisher', publisher
, '--publisher', 'Vreme d.o.o.'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self): def get_browser(self):
@ -67,9 +75,28 @@ class Vreme(BasicNewsRecipe):
}) })
return [(soup.head.title.string, articles)] return [(soup.head.title.string, articles)]
remove_tags = [
dict(name=['object','link'])
,dict(name='table',attrs={'xclass':'image'})
]
def print_version(self, url): def print_version(self, url):
return url + '&print=yes' return url + '&print=yes'
def preprocess_html(self, soup):
del soup.body['text' ]
del soup.body['bgcolor']
del soup.body['onload' ]
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
tbl = soup.body.table
tbbb = soup.find('td')
if tbbb:
tbbb.extract()
tbl.extract()
soup.body.insert(0,tbbb)
return soup
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
@ -77,3 +104,5 @@ class Vreme(BasicNewsRecipe):
if cover_item: if cover_item:
cover_url = self.INDEX + cover_item['src'] cover_url = self.INDEX + cover_item['src']
return cover_url return cover_url
language = _('Serbian')

View File

@ -410,6 +410,7 @@ class RecursiveFetcher(object, LoggingInterface):
_fname.decode('latin1', 'replace') _fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '') _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
_fname = sanitize_file_name(_fname) _fname = sanitize_file_name(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml'
res = os.path.join(linkdiskpath, _fname) res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res) self.downloaded_paths.append(res)
self.filemap[nurl] = res self.filemap[nurl] = res

View File

@ -77,7 +77,7 @@ def run_windows_install_jammer(installer):
def build_windows(shutdown=True): def build_windows(shutdown=True):
installer = installer_name('exe') installer = installer_name('exe')
vm = '/vmware/Windows XP/Windows XP Professional.vmx' vm = '/mnt/backup/calibre_windows_xp_home/calibre_windows_xp_home.vmx'
start_vm(vm, 'windows', BUILD_SCRIPT%('python setup.py develop', 'python','installer\\\\windows\\\\freeze.py')) start_vm(vm, 'windows', BUILD_SCRIPT%('python setup.py develop', 'python','installer\\\\windows\\\\freeze.py'))
if os.path.exists('build/py2exe'): if os.path.exists('build/py2exe'):
shutil.rmtree('build/py2exe') shutil.rmtree('build/py2exe')