mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
cbdf0ea09e
@ -299,7 +299,6 @@ File ::2BCD9281-2CBC-CF0D-0E12-2CE11F6ED758 -name comic2epub.exe.local -parent 8
|
|||||||
File ::EDE6F457-C83F-C5FA-9AF4-38FDFF17D929 -name PIL._imagingtk.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::EDE6F457-C83F-C5FA-9AF4-38FDFF17D929 -name PIL._imagingtk.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::09D0906E-3611-3DB7-32CF-A140585694A7 -name win32pdh.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::09D0906E-3611-3DB7-32CF-A140585694A7 -name win32pdh.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::4C84F0DC-7157-0C90-2062-180139B03E25 -name IM_MOD_RL_rgb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::4C84F0DC-7157-0C90-2062-180139B03E25 -name IM_MOD_RL_rgb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::F402F507-87C5-BDB1-80AE-AD3FF4A4BCE7 -name bzrlib._patiencediff_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
|
||||||
File ::A732EDE7-4796-241F-BECA-68E59F88F8AF -name lrs2lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::A732EDE7-4796-241F-BECA-68E59F88F8AF -name lrs2lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::69072379-7D16-B9F7-9F39-3E6403C48267 -name IM_MOD_RL_xbm_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::69072379-7D16-B9F7-9F39-3E6403C48267 -name IM_MOD_RL_xbm_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::FBD11D98-D1E7-5DD9-BF02-01CE92518859 -name IM_MOD_RL_otb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::FBD11D98-D1E7-5DD9-BF02-01CE92518859 -name IM_MOD_RL_otb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
@ -365,7 +364,6 @@ File ::26741B21-C241-E100-8BB1-8B679BC3E662 -name configure.xml -parent 8E5D85A4
|
|||||||
File ::7D491E89-C6D3-1E6E-F4BD-8E55260FE33E -name libexpat.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::7D491E89-C6D3-1E6E-F4BD-8E55260FE33E -name libexpat.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::A4910EB3-0F1C-F6F0-CD2D-16A64BBAA92B -name calibre-fontconfig.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::A4910EB3-0F1C-F6F0-CD2D-16A64BBAA92B -name calibre-fontconfig.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::8711327A-716D-B162-6AC6-2FB4AD071266 -name fb22lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::8711327A-716D-B162-6AC6-2FB4AD071266 -name fb22lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::0FDD3A7A-31F3-8089-CE32-D80EAA6F62B2 -name bzrlib._btree_serializer_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
|
||||||
File ::476CB977-5155-D56F-26CA-EB243AEBBA99 -name unrar.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::476CB977-5155-D56F-26CA-EB243AEBBA99 -name unrar.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::2DA1CC8D-AF5C-3B03-2060-301DFE0356CC -name mobi2oeb.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::2DA1CC8D-AF5C-3B03-2060-301DFE0356CC -name mobi2oeb.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::2E2A9EDA-5386-444E-8479-557386794552 -name IM_MOD_RL_uil_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::2E2A9EDA-5386-444E-8479-557386794552 -name IM_MOD_RL_uil_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
@ -487,7 +485,6 @@ File ::AA761ACD-B728-2324-AA75-B20A2A79F125 -name lrf2lrs.exe -parent 8E5D85A4-7
|
|||||||
File ::95434C76-22F5-B9CE-6194-6E1B1EE3232D -name IM_MOD_RL_info_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::95434C76-22F5-B9CE-6194-6E1B1EE3232D -name IM_MOD_RL_info_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::AAF45D03-322F-5553-63A7-312DB754A20B -name _ctypes.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::AAF45D03-322F-5553-63A7-312DB754A20B -name _ctypes.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::C3D351CA-A8D8-AB35-55D9-5AACF8DB37D1 -name python26.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::C3D351CA-A8D8-AB35-55D9-5AACF8DB37D1 -name python26.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::2F90B52F-A728-2CA4-5688-0283674695B7 -name _elementtree.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
|
||||||
File ::B50B66A1-FB65-FAD5-1DD7-E894ACC07464 -name QtSvg4.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::B50B66A1-FB65-FAD5-1DD7-E894ACC07464 -name QtSvg4.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::906FF13D-D993-7192-7EA5-6D15A5A24BFB -name CORE_RL_png_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::906FF13D-D993-7192-7EA5-6D15A5A24BFB -name CORE_RL_png_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
File ::5D368661-6BF0-D6AF-7C1A-87646864EB4B -name delegates.xml -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
File ::5D368661-6BF0-D6AF-7C1A-87646864EB4B -name delegates.xml -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40
|
||||||
@ -552,7 +549,7 @@ SetupType ::D9ADE41C-B744-690C-2CED-CF826BF03D2E -setup Install -active Yes -pla
|
|||||||
|
|
||||||
InstallComponent 3EA07B17-04D8-6508-B535-96CC7173B49A -setup Install -type pane -conditions D7F585DB-0DEC-A94E-DAB0-94D558D82764 -title {Welcome Screen} -component Welcome -command insert -active Yes -parent StandardInstall
|
InstallComponent 3EA07B17-04D8-6508-B535-96CC7173B49A -setup Install -type pane -conditions D7F585DB-0DEC-A94E-DAB0-94D558D82764 -title {Welcome Screen} -component Welcome -command insert -active Yes -parent StandardInstall
|
||||||
Condition D7F585DB-0DEC-A94E-DAB0-94D558D82764 -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id D7F585DB-0DEC-A94E-DAB0-94D558D82764
|
Condition D7F585DB-0DEC-A94E-DAB0-94D558D82764 -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id D7F585DB-0DEC-A94E-DAB0-94D558D82764
|
||||||
InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command reorder -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
|
InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
|
||||||
Condition ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -active Yes -parent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E
|
Condition ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -active Yes -parent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E
|
||||||
InstallComponent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -setup Install -type action -conditions 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -title {Set Virtual Text} -component SetVirtualText -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
|
InstallComponent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -setup Install -type action -conditions 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -title {Set Virtual Text} -component SetVirtualText -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A
|
||||||
Condition 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -active Yes -parent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -title {String Is Condition} -component StringIsCondition -TreeObject::id 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB
|
Condition 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -active Yes -parent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -title {String Is Condition} -component StringIsCondition -TreeObject::id 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB
|
||||||
|
@ -12,7 +12,7 @@ LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
|||||||
PDFTOHTML = 'C:\\pdftohtml\\pdftohtml.exe'
|
PDFTOHTML = 'C:\\pdftohtml\\pdftohtml.exe'
|
||||||
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
||||||
FONTCONFIG_DIR = 'C:\\fontconfig'
|
FONTCONFIG_DIR = 'C:\\fontconfig'
|
||||||
VC90 = r'C:\Program Files\Microsoft Visual Studio 9.0\VC\redist\x86\Microsoft.VC90.CRT'
|
VC90 = r'C:\VC90.CRT'
|
||||||
|
|
||||||
import sys, os, py2exe, shutil, zipfile, glob, subprocess, re
|
import sys, os, py2exe, shutil, zipfile, glob, subprocess, re
|
||||||
from distutils.core import setup
|
from distutils.core import setup
|
||||||
|
@ -21,6 +21,8 @@ import mechanize
|
|||||||
|
|
||||||
mimetypes.add_type('application/epub+zip', '.epub')
|
mimetypes.add_type('application/epub+zip', '.epub')
|
||||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||||
|
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
||||||
|
mimetypes.add_type('image/svg+xml', '.svg')
|
||||||
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
|
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
|
||||||
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
|
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
|
||||||
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
|
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.4.133'
|
__version__ = '0.4.134'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
'''
|
'''
|
||||||
Various run time constants.
|
Various run time constants.
|
||||||
|
@ -467,7 +467,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
if self.htmlfile.is_binary:
|
if self.htmlfile.is_binary:
|
||||||
raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
|
raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
|
||||||
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
|
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
|
||||||
src = src.replace('\x00', '')
|
src = src.replace('\x00', '').replace('\r', ' ')
|
||||||
src = self.preprocess(src)
|
src = self.preprocess(src)
|
||||||
# lxml chokes on unicode input when it contains encoding declarations
|
# lxml chokes on unicode input when it contains encoding declarations
|
||||||
for pat in ENCODING_PATS:
|
for pat in ENCODING_PATS:
|
||||||
|
@ -17,6 +17,7 @@ import types
|
|||||||
import re
|
import re
|
||||||
import copy
|
import copy
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
from xml.dom import SyntaxErr as CSSSyntaxError
|
||||||
import cssutils
|
import cssutils
|
||||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||||
CSSValueList, cssproperties
|
CSSValueList, cssproperties
|
||||||
@ -291,10 +292,14 @@ class Style(object):
|
|||||||
|
|
||||||
def _apply_style_attr(self):
|
def _apply_style_attr(self):
|
||||||
attrib = self._element.attrib
|
attrib = self._element.attrib
|
||||||
if 'style' in attrib:
|
if 'style' not in attrib:
|
||||||
|
return
|
||||||
css = attrib['style'].split(';')
|
css = attrib['style'].split(';')
|
||||||
css = filter(None, map(lambda x: x.strip(), css))
|
css = filter(None, (x.strip() for x in css))
|
||||||
|
try:
|
||||||
style = CSSStyleDeclaration('; '.join(css))
|
style = CSSStyleDeclaration('; '.join(css))
|
||||||
|
except CSSSyntaxError:
|
||||||
|
return
|
||||||
self._style.update(self._stylizer.flatten_style(style))
|
self._style.update(self._stylizer.flatten_style(style))
|
||||||
|
|
||||||
def _has_parent(self):
|
def _has_parent(self):
|
||||||
|
BIN
src/calibre/gui2/images/news/starbulletin.png
Normal file
BIN
src/calibre/gui2/images/news/starbulletin.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 983 B |
@ -369,13 +369,14 @@ class Main(MainWindow, Ui_MainWindow):
|
|||||||
if r == QSystemTrayIcon.Trigger:
|
if r == QSystemTrayIcon.Trigger:
|
||||||
if self.isVisible():
|
if self.isVisible():
|
||||||
for window in QApplication.topLevelWidgets():
|
for window in QApplication.topLevelWidgets():
|
||||||
if isinstance(window, (MainWindow, QDialog)):
|
if isinstance(window, (MainWindow, QDialog)) and window.isVisible():
|
||||||
window.hide()
|
window.hide()
|
||||||
|
setattr(window, '__systray_minimized', True)
|
||||||
else:
|
else:
|
||||||
for window in QApplication.topLevelWidgets():
|
for window in QApplication.topLevelWidgets():
|
||||||
if isinstance(window, (MainWindow, QDialog)):
|
if getattr(window, '__systray_minimized', False):
|
||||||
if window not in (self.device_error_dialog, self.jobs_dialog):
|
|
||||||
window.show()
|
window.show()
|
||||||
|
setattr(window, '__systray_minimized', False)
|
||||||
|
|
||||||
|
|
||||||
def do_default_sync(self, checked):
|
def do_default_sync(self, checked):
|
||||||
|
@ -27,6 +27,7 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
|
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
|
||||||
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
|
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
|
||||||
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
|
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
|
||||||
|
'honoluluadvertiser', 'starbulletin', 'exiled',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
@ -1,32 +1,39 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ambito.com
|
ambito.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Ambito(BasicNewsRecipe):
|
class Ambito(BasicNewsRecipe):
|
||||||
title = 'Ambito.com'
|
title = 'Ambito.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
|
publisher = 'Ambito.com'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
encoding = 'iso-8859-1'
|
||||||
encoding = 'iso--8859-1'
|
|
||||||
language = _('Spanish')
|
|
||||||
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||||
@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -7,25 +7,33 @@ b92.net
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class B92(BasicNewsRecipe):
|
class B92(BasicNewsRecipe):
|
||||||
title = u'B92'
|
title = 'B92'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
language = _('Serbian')
|
|
||||||
description = 'Dnevne vesti iz Srbije i sveta'
|
description = 'Dnevne vesti iz Srbije i sveta'
|
||||||
oldest_article = 7
|
oldest_article = 2
|
||||||
|
publisher = 'B92.net'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://static.b92.net/images/fp/logo.gif'
|
cover_url = 'http://static.b92.net/images/fp/logo.gif'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'B92'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||||
@ -44,3 +52,16 @@ class B92(BasicNewsRecipe):
|
|||||||
if biz:
|
if biz:
|
||||||
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
|
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
|
||||||
return nurl
|
return nurl
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['xml:lang'] = 'sr-Latn'
|
||||||
|
soup.html['lang'] = 'sr-Latn'
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(name='img',align=True):
|
||||||
|
del item['align']
|
||||||
|
item.insert(0,'<br /><br />')
|
||||||
|
return soup
|
||||||
|
language = _('Serbian')
|
@ -5,31 +5,49 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
blic.rs
|
blic.rs
|
||||||
'''
|
'''
|
||||||
import string,re
|
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Blic(BasicNewsRecipe):
|
class Blic(BasicNewsRecipe):
|
||||||
title = u'Blic'
|
title = u'Blic'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||||
oldest_article = 7
|
publisher = 'RINGIER d.o.o.'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'Blic'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
||||||
|
|
||||||
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
start_url, question, rest_url = url.partition('?')
|
start_url, question, rest_url = url.partition('?')
|
||||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -1,32 +1,36 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
clarin.com
|
clarin.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Clarin(BasicNewsRecipe):
|
class Clarin(BasicNewsRecipe):
|
||||||
title = 'Clarin'
|
title = 'Clarin'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina y mundo'
|
description = 'Noticias de Argentina y mundo'
|
||||||
|
publisher = 'Grupo Clarin'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = _('Spanish')
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Argentina'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'Grupo Clarin'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='a' , attrs={'class':'Imp' })
|
dict(name='a' , attrs={'class':'Imp' })
|
||||||
,dict(name='div' , attrs={'class':'Perma' })
|
,dict(name='div' , attrs={'class':'Perma' })
|
||||||
@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe):
|
|||||||
rest = artl.partition('-0')[-1]
|
rest = artl.partition('-0')[-1]
|
||||||
lmain = rest.partition('.')[0]
|
lmain = rest.partition('.')[0]
|
||||||
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -5,37 +5,47 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
danas.rs
|
danas.rs
|
||||||
'''
|
'''
|
||||||
import string,re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = 'Danas'
|
title = u'Danas'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
|
description = 'Vesti'
|
||||||
|
publisher = 'Danas d.o.o.'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = False
|
||||||
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.danas.rs/images/basic/danas.gif'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'Danas'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':'width_1_4' })
|
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||||
,dict(name='div', attrs={'class':'metaClanka' })
|
|
||||||
,dict(name='div', attrs={'id':'comments'})
|
,dict(name='div', attrs={'id':'comments'})
|
||||||
,dict(name='div', attrs={'class':'baner' })
|
,dict(name=['object','link'])
|
||||||
,dict(name='div', attrs={'class':'slikaClanka'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def preprocess_html(self, soup):
|
||||||
return url + '&action=print'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
language = _('Serbian')
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class DeStandaard(BasicNewsRecipe):
|
class DeStandaard(BasicNewsRecipe):
|
||||||
title = u'De Standaard'
|
title = u'De Standaard'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
language = _('French')
|
language = _('Dutch')
|
||||||
description = u'News from Belgium'
|
description = u'News from Belgium'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -13,6 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
|
|||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
description = u'News from Belgium'
|
description = u'News from Belgium'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
language = _('Dutch')
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
@ -5,32 +5,37 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
emol.com
|
emol.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElMercurio(BasicNewsRecipe):
|
class ElMercurio(BasicNewsRecipe):
|
||||||
title = 'El Mercurio online'
|
title = 'El Mercurio online'
|
||||||
language = _('Spanish')
|
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
|
publisher = 'El Mercurio'
|
||||||
|
category = 'news, politics, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Chile'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
||||||
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
||||||
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
||||||
@ -46,3 +51,11 @@ class ElMercurio(BasicNewsRecipe):
|
|||||||
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
'''
|
'''
|
||||||
@ -12,9 +12,11 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
title = 'ElArgentino.com'
|
title = 'ElArgentino.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
language = _('Spanish')
|
publisher = 'ElArgentino.com'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
@ -22,10 +24,12 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category', category
|
||||||
, '--publisher' , 'ElArgentino.com'
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'noprint' })
|
dict(name='div', attrs={'id':'noprint' })
|
||||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||||
@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe):
|
|||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
soup.prettify()
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -12,35 +12,49 @@ class ElMundo(BasicNewsRecipe):
|
|||||||
title = 'El Mundo'
|
title = 'El Mundo'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Spain'
|
description = 'News from Spain'
|
||||||
language = _('Spanish')
|
publisher = 'El Mundo'
|
||||||
|
category = 'news, politics, Spain'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'iso8859_15'
|
encoding = 'iso8859_15'
|
||||||
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Spain'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
|
||||||
|
,dict(name='div', attrs={'class':['contenido_noticia_01']})
|
||||||
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']})
|
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
||||||
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
||||||
,dict(name=['object','script','link', 'a'])
|
|
||||||
,dict(name='ul', attrs={'class':'herramientas' })
|
,dict(name='ul', attrs={'class':'herramientas' })
|
||||||
|
,dict(name=['object','link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
||||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
|
||||||
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
||||||
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
||||||
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
||||||
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
||||||
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
||||||
|
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -8,25 +8,28 @@ estadao.com.br
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Estadao(BasicNewsRecipe):
|
class Estadao(BasicNewsRecipe):
|
||||||
title = 'O Estado de S. Paulo'
|
title = 'O Estado de S. Paulo'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Brasil'
|
description = 'News from Brasil in Portugese'
|
||||||
language = _('Spanish')
|
publisher = 'O Estado de S. Paulo'
|
||||||
|
category = 'news, politics, Brasil'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = 'http://www.estadao.com.br/img/logo_estadao.png'
|
cover_url = 'http://www.estadao.com.br/img/logo_estadao.png'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Brasil'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe):
|
|||||||
ifr = soup.find('iframe')
|
ifr = soup.find('iframe')
|
||||||
if ifr:
|
if ifr:
|
||||||
ifr.extract()
|
ifr.extract()
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Portugese')
|
51
src/calibre/web/feeds/recipes/recipe_exiled.py
Normal file
51
src/calibre/web/feeds/recipes/recipe_exiled.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
exiledonline.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Exiled(BasicNewsRecipe):
|
||||||
|
title = 'Exiled Online'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
||||||
|
publisher = 'Exiled Online'
|
||||||
|
language = _('English')
|
||||||
|
category = 'news, politics, international'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment' , description
|
||||||
|
, '--category' , category
|
||||||
|
, '--publisher' , publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link'])
|
||||||
|
,dict(name='div', attrs={'class':'info'})
|
||||||
|
,dict(name='div', attrs={'id':['comments','navig']})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://exiledonline.com/feed/' )]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
return soup
|
||||||
|
|
@ -7,37 +7,46 @@ granma.cubaweb.cu
|
|||||||
'''
|
'''
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Granma(BasicNewsRecipe):
|
class Granma(BasicNewsRecipe):
|
||||||
title = 'Diario Granma'
|
title = 'Diario Granma'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
language = _('Spanish')
|
|
||||||
description = 'Organo oficial del Comite Central del Partido Comunista de Cuba'
|
description = 'Organo oficial del Comite Central del Partido Comunista de Cuba'
|
||||||
|
publisher = 'Granma'
|
||||||
|
category = 'news, politics, Cuba'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Cuba'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
|
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
|
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del soup.body.table['style']
|
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||||
rtag = soup.find('td', attrs={'height':'458'})
|
soup.head.insert(0,mtag)
|
||||||
if rtag:
|
for item in soup.findAll('table'):
|
||||||
del rtag['style']
|
if item.has_key('width'):
|
||||||
|
del item['width']
|
||||||
|
if item.has_key('height'):
|
||||||
|
del item['height']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
harpers.org - paid subscription/ printed issue articles
|
harpers.org - paid subscription/ printed issue articles
|
||||||
This recipe only get's article's published in text format
|
This recipe only get's article's published in text format
|
||||||
@ -9,13 +9,15 @@ images and pdf's are ignored
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Harpers_full(BasicNewsRecipe):
|
class Harpers_full(BasicNewsRecipe):
|
||||||
title = u"Harper's Magazine - articles from printed edition"
|
title = u"Harper's Magazine - articles from printed edition"
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
description = u"Harper's Magazine: Founded June 1850."
|
description = u"Harper's Magazine: Founded June 1850."
|
||||||
language = _('English')
|
publisher = "Harpers's"
|
||||||
|
category = 'news, politics, USA'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -26,6 +28,15 @@ class Harpers_full(BasicNewsRecipe):
|
|||||||
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
|
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
|
||||||
LOGIN = 'http://www.harpers.org'
|
LOGIN = 'http://www.harpers.org'
|
||||||
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
|
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
|
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -60,3 +71,10 @@ class Harpers_full(BasicNewsRecipe):
|
|||||||
,'description':''
|
,'description':''
|
||||||
})
|
})
|
||||||
return [(soup.head.title.string, articles)]
|
return [(soup.head.title.string, articles)]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('English')
|
58
src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py
Normal file
58
src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
honoluluadvertiser.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Honoluluadvertiser(BasicNewsRecipe):
|
||||||
|
title = 'Honolulu Advertiser'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
|
||||||
|
publisher = 'Honolulu Advertiser'
|
||||||
|
category = 'news, Honolulu, Hawaii'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.honoluluadvertiser.com/graphics/branding.gif'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment' , description
|
||||||
|
, '--category' , category
|
||||||
|
, '--publisher' , publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='td')]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
|
||||||
|
,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
|
||||||
|
,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
|
||||||
|
,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
|
||||||
|
,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
|
||||||
|
,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
ubody, sep, rest = url.rpartition('/-1/')
|
||||||
|
root, sep2, article_id = ubody.partition('/article/')
|
||||||
|
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'
|
||||||
|
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
infobae.com
|
infobae.com
|
||||||
'''
|
'''
|
||||||
@ -12,21 +12,23 @@ class Infobae(BasicNewsRecipe):
|
|||||||
title = 'Infobae.com'
|
title = 'Infobae.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
|
publisher = 'Infobae.com'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'iso-8859-1'
|
||||||
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category', category
|
||||||
, '--publisher' , 'Infobae.com'
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||||
@ -39,3 +41,12 @@ class Infobae(BasicNewsRecipe):
|
|||||||
main, sep, article_part = url.partition('contenidos/')
|
main, sep, article_part = url.partition('contenidos/')
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
article_id, rsep, rrest = article_part.partition('-')
|
||||||
return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -12,20 +12,24 @@ class JBOnline(BasicNewsRecipe):
|
|||||||
title = 'Jornal Brasileiro Online'
|
title = 'Jornal Brasileiro Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Brasil'
|
description = 'News from Brasil'
|
||||||
|
publisher = 'Jornal Brasileiro'
|
||||||
|
category = 'news, politics, Brasil'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif'
|
cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Brasil'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['script','object','form'])]
|
remove_tags = [dict(name=['script','object','form'])]
|
||||||
@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe):
|
|||||||
ifr = soup.find('iframe')
|
ifr = soup.find('iframe')
|
||||||
if ifr:
|
if ifr:
|
||||||
ifr.extract()
|
ifr.extract()
|
||||||
item = soup.find('div', attrs={'id':'corpoNoticia'})
|
for item in soup.findAll(style=True):
|
||||||
if item:
|
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Portugese')
|
@ -6,28 +6,35 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
jutarnji.hr
|
jutarnji.hr
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import string, re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
|
|
||||||
class Jutarnji(BasicNewsRecipe):
|
class Jutarnji(BasicNewsRecipe):
|
||||||
title = 'Jutarnji'
|
title = u'Jutarnji'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
description = 'Online izdanje Jutarnjeg lista'
|
description = u'Hrvatski portal'
|
||||||
|
publisher = 'Jutarnji.hr'
|
||||||
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
delay = 1
|
delay = 1
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Croatia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'Europapress holding d.o.o.'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -49,11 +56,16 @@ class Jutarnji(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, split, rest = url.partition('.jl')
|
main, split, rest = url.partition('.jl')
|
||||||
rmain, rsplit, rrest = main.rpartition(',')
|
rmain, rsplit, rrest = main.rpartition(',')
|
||||||
return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
soup.prettify()
|
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(width=True):
|
||||||
|
del item['width']
|
||||||
return soup
|
return soup
|
||||||
|
|
@ -13,21 +13,25 @@ class Juventudrebelde(BasicNewsRecipe):
|
|||||||
title = 'Juventud Rebelde'
|
title = 'Juventud Rebelde'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Diario de la Juventud Cubana'
|
description = 'Diario de la Juventud Cubana'
|
||||||
|
publisher = 'Juventud rebelde'
|
||||||
|
category = 'news, politics, Cuba'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Cuba'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe):
|
|||||||
,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
|
,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -5,7 +5,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
juventudrebelde.co.cu
|
juventudrebelde.co.cu
|
||||||
'''
|
'''
|
||||||
from calibre import strftime
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -13,22 +12,33 @@ class Juventudrebelde_english(BasicNewsRecipe):
|
|||||||
title = 'Juventud Rebelde in english'
|
title = 'Juventud Rebelde in english'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'The newspaper of Cuban Youth'
|
description = 'The newspaper of Cuban Youth'
|
||||||
language = _('English')
|
publisher = 'Juventud Rebelde'
|
||||||
|
category = 'news, politics, Cuba'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'iso-8859-1'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Cuba'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'read'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'read'})]
|
||||||
|
|
||||||
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
|
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('English')
|
@ -11,25 +11,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class LaCuarta(BasicNewsRecipe):
|
class LaCuarta(BasicNewsRecipe):
|
||||||
title = 'La Cuarta'
|
title = 'La Cuarta'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'La Cuarta Cibernetica: El Diario popular'
|
||||||
|
publisher = 'CODISA, Consorcio Digital S.A.'
|
||||||
|
category = 'news, politics, entertainment, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Chile'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
|
keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='script')
|
dict(name='ul')
|
||||||
,dict(name='ul')
|
|
||||||
,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
|
,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
|
||||||
,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
|
,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
|
||||||
,dict(name='input')
|
,dict(name='input')
|
||||||
@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe):
|
|||||||
,dict(name='strong', text='PUBLICIDAD')
|
,dict(name='strong', text='PUBLICIDAD')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
|
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
|
||||||
|
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -12,21 +12,24 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
title = 'La Segunda'
|
title = 'La Segunda'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
language = _('Spanish')
|
publisher = 'La Segunda'
|
||||||
|
category = 'news, politics, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Chile'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table')]
|
keep_only_tags = [dict(name='table')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -46,3 +49,13 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||||
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(name='table', width=True):
|
||||||
|
del item['width']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -12,20 +12,24 @@ class LaTercera(BasicNewsRecipe):
|
|||||||
title = 'La Tercera'
|
title = 'La Tercera'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
|
publisher = 'La Tercera'
|
||||||
|
category = 'news, politics, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Chile'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ]
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='script')
|
dict(name='script')
|
||||||
@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe):
|
|||||||
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
|
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
lanacion.com.ar
|
lanacion.com.ar
|
||||||
'''
|
'''
|
||||||
@ -11,20 +11,23 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Lanacion(BasicNewsRecipe):
|
class Lanacion(BasicNewsRecipe):
|
||||||
title = 'La Nacion'
|
title = 'La Nacion'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!'
|
description = 'Noticias de Argentina y el resto del mundo'
|
||||||
|
publisher = 'La Nacion'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Argentina'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'La Nacion SA'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||||
@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe):
|
|||||||
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
|
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def preprocess_html(self, soup):
|
||||||
index = 'http://www.lanacion.com.ar'
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
cover_url = None
|
soup.head.insert(0,mtag)
|
||||||
soup = self.index_to_soup(index)
|
for item in soup.findAll(style=True):
|
||||||
cover_item = soup.find('img',attrs={'class':'logo'})
|
del item['style']
|
||||||
if cover_item:
|
return soup
|
||||||
cover_url = index + cover_item['src']
|
|
||||||
return cover_url
|
language = _('Spanish')
|
@ -13,20 +13,24 @@ class LaNacionChile(BasicNewsRecipe):
|
|||||||
title = 'La Nacion Chile'
|
title = 'La Nacion Chile'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
|
publisher = 'La Nacion'
|
||||||
|
category = 'news, politics, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
|
cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Chile'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
|
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
|
||||||
@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe):
|
|||||||
item = soup.find('a', attrs={'href':'javascript:window.close()'})
|
item = soup.find('a', attrs={'href':'javascript:window.close()'})
|
||||||
if item:
|
if item:
|
||||||
item.extract()
|
item.extract()
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
laprensa.com.ar
|
laprensa.com.ar
|
||||||
'''
|
'''
|
||||||
@ -13,20 +13,24 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
title = 'La Prensa'
|
title = 'La Prensa'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
|
publisher = 'La Prensa'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
language = _('Spanish')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
|
cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
|
(u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
|
||||||
,(u'Economia' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
|
,(u'Economia' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
|
||||||
@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del soup.body['onload']
|
del soup.body['onload']
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -7,12 +7,15 @@ nin.co.yu
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re, urllib
|
import re, urllib
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Nin(BasicNewsRecipe):
|
class Nin(BasicNewsRecipe):
|
||||||
title = 'NIN online'
|
title = 'NIN online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Nedeljne informativne novine'
|
description = 'Nedeljne informativne novine'
|
||||||
|
publisher = 'NIN'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
@ -22,12 +25,18 @@ class Nin(BasicNewsRecipe):
|
|||||||
PREFIX = 'http://www.nin.co.yu'
|
PREFIX = 'http://www.nin.co.yu'
|
||||||
INDEX = PREFIX + '/?change_lang=ls'
|
INDEX = PREFIX + '/?change_lang=ls'
|
||||||
LOGIN = PREFIX + '/?logout=true'
|
LOGIN = PREFIX + '/?logout=true'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, politics, Serbia'
|
, '--category', category
|
||||||
, '--publisher' , 'NIN'
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
@ -53,3 +62,12 @@ class Nin(BasicNewsRecipe):
|
|||||||
if link_item:
|
if link_item:
|
||||||
cover_url = self.PREFIX + link_item['src']
|
cover_url = self.PREFIX + link_item['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -5,31 +5,45 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
novosti.rs
|
novosti.rs
|
||||||
'''
|
'''
|
||||||
import string,re
|
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Novosti(BasicNewsRecipe):
|
class Novosti(BasicNewsRecipe):
|
||||||
title = 'Vecernje Novosti'
|
title = u'Vecernje Novosti'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = u'Darko Miletic'
|
||||||
description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti'
|
description = u'Vesti'
|
||||||
|
publisher = 'Kompanija Novosti'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'Novosti AD'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})]
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'info_bottom'})
|
remove_tags = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})]
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class':'info'})
|
|
||||||
,dict(name='div', attrs={'class':'info_bottom'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -6,28 +6,36 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
nspm.rs
|
nspm.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import string,re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Nspm(BasicNewsRecipe):
|
class Nspm(BasicNewsRecipe):
|
||||||
title = u'Nova srpska politicka misao'
|
title = u'Nova srpska politicka misao'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
||||||
|
publisher = 'NSPM'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
language = _('Serbian')
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||||
cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
|
encoding = 'utf8'
|
||||||
|
remove_javascript = True
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, politics, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'IIC NSPM'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
remove_tags = [dict(name='a')]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -38,3 +46,15 @@ class Nspm(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('.html','/stampa.html')
|
return url.replace('.html','/stampa.html')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['xml:lang'] = 'sr-Latn-RS'
|
||||||
|
soup.html['lang'] = 'sr-Latn-RS'
|
||||||
|
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
||||||
|
if ftag:
|
||||||
|
ftag['content'] = 'sr-Latn-RS'
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -12,20 +12,24 @@ class OGlobo(BasicNewsRecipe):
|
|||||||
title = 'O Globo'
|
title = 'O Globo'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Brasil'
|
description = 'News from Brasil'
|
||||||
|
publisher = 'O Globo'
|
||||||
|
category = 'news, politics, Brasil'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = _('Spanish')
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://oglobo.globo.com/_img/o-globo.png'
|
cover_url = 'http://oglobo.globo.com/_img/o-globo.png'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Brasil'
|
, '--category', category
|
||||||
, '--publisher' , title
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe):
|
|||||||
,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
|
,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
|
||||||
,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
|
,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Portugese')
|
@ -1,32 +1,37 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
pagina12.com.ar
|
pagina12.com.ar
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
class Pagina12(BasicNewsRecipe):
|
||||||
title = u'Pagina/12'
|
title = u'Pagina/12'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina y el resto del mundo'
|
description = 'Noticias de Argentina y el resto del mundo'
|
||||||
language = _('Spanish')
|
publisher = 'La Pagina S.A.'
|
||||||
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
|
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category', category
|
||||||
, '--publisher' , 'La Pagina S.A.'
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'volver'})
|
dict(name='div', attrs={'id':'volver'})
|
||||||
@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Spanish')
|
@ -6,30 +6,53 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
pescanik.net
|
pescanik.net
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import string,re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Pescanik(BasicNewsRecipe):
|
class Pescanik(BasicNewsRecipe):
|
||||||
title = 'Pescanik'
|
title = 'Pescanik'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Pescanik'
|
description = 'Pescanik'
|
||||||
|
publisher = 'Pescanik'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
html2lrf_options = ['--base-font-size', '10']
|
remove_javascript = True
|
||||||
html2epub_options = 'base_font_size = "10pt"'
|
encoding = 'utf8'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'article_seperator'})
|
remove_tags = [
|
||||||
|
dict(name='td' , attrs={'class':'buttonheading'})
|
||||||
remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})]
|
,dict(name='span', attrs={'class':'article_seperator'})
|
||||||
|
,dict(name=['object','link'])
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php')
|
nurl = url.replace('/index.php','/index2.php')
|
||||||
return nurl + '&pop=1&page=0'
|
return nurl + '&pop=1&page=0'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -5,37 +5,61 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
politika.rs
|
politika.rs
|
||||||
'''
|
'''
|
||||||
import string,re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Politika(BasicNewsRecipe):
|
class Politika(BasicNewsRecipe):
|
||||||
title = 'Politika Online'
|
title = u'Politika Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Najstariji dnevni list na Balkanu'
|
description = 'Najstariji dnevni list na Balkanu'
|
||||||
|
publisher = 'Politika novine i Magazini d.o.o'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '.content_center_border {text-align: left;}'
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.politika.rs:8080/images/politika.gif'
|
remove_javascript = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':'contentcenter'}) ]
|
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'})
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['send_print','txt-komentar']})
|
||||||
|
,dict(name=['object','link','a'])
|
||||||
|
,dict(name='h1', attrs={'class':'box_header-tags'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||||
|
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
|
||||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||||
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
|
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
|
||||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||||
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||||
|
if ftag:
|
||||||
|
ftag['align'] = 'left'
|
||||||
|
return soup
|
||||||
|
@ -17,7 +17,7 @@ class PetersburgTimes(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Russian')
|
language = _('English')
|
||||||
INDEX = 'http://www.sptimes.ru'
|
INDEX = 'http://www.sptimes.ru'
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
58
src/calibre/web/feeds/recipes/recipe_starbulletin.py
Normal file
58
src/calibre/web/feeds/recipes/recipe_starbulletin.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
starbulletin.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Starbulletin(BasicNewsRecipe):
|
||||||
|
title = 'Honolulu Star-Bulletin'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Latest national and local Hawaii sports news"
|
||||||
|
publisher = 'Honolulu Star-Bulletin'
|
||||||
|
category = 'news, Honolulu, Hawaii'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://media.starbulletin.com/designimages/spacer.gif'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment' , description
|
||||||
|
, '--category' , category
|
||||||
|
, '--publisher' , publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
keep_only_tags = [ dict(name='div', attrs={'id':'storyColoumn'}) ]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link'])
|
||||||
|
,dict(name='span', attrs={'id':'printdesc'})
|
||||||
|
,dict(name='div' , attrs={'class':'lightGreyBox storyTools clearAll'})
|
||||||
|
,dict(name='div' , attrs={'id':'breadcrumbs'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Headlines', u'http://www.starbulletin.com/starbulletin_headlines.rss' )
|
||||||
|
,(u'News', u'http://www.starbulletin.com/news/index.rss' )
|
||||||
|
,(u'Sports', u'http://www.starbulletin.com/sports/index.rss' )
|
||||||
|
,(u'Features', u'http://www.starbulletin.com/features/index.rss' )
|
||||||
|
,(u'Editorials', u'http://www.starbulletin.com/editorials/index.rss' )
|
||||||
|
,(u'Business', u'http://www.starbulletin.com/business/index.rss' )
|
||||||
|
,(u'Travel', u'http://www.starbulletin.com/travel/index.rss' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
return soup
|
||||||
|
|
@ -1,13 +1,13 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
vijesti.cg.yu
|
vijesti.cg.yu
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import string,re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -15,23 +15,35 @@ class Vijesti(BasicNewsRecipe):
|
|||||||
title = 'Vijesti'
|
title = 'Vijesti'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Montenegro'
|
description = 'News from Montenegro'
|
||||||
oldest_article = 2
|
publisher = 'Daily Press Vijesti'
|
||||||
|
category = 'news, politics, Montenegro'
|
||||||
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
remove_javascript = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
|
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
|
||||||
|
remove_javascript = True
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
use_embedded_content = False
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category' , 'news, Montenegro'
|
, '--category', category
|
||||||
, '--publisher' , 'Daily Press Vijesti'
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'align':'right'})
|
||||||
|
,dict(name=['object','link'])
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
|
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -39,4 +51,10 @@ class Vijesti(BasicNewsRecipe):
|
|||||||
soup.html['lang'] = 'sr-Latn-ME'
|
soup.html['lang'] = 'sr-Latn-ME'
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if item.has_key('align'):
|
||||||
|
del item['align']
|
||||||
|
item.insert(0,'<br /><br />')
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -6,26 +6,34 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
vreme.com
|
vreme.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import string,re
|
import re
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Vreme(BasicNewsRecipe):
|
class Vreme(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Vreme'
|
title = 'Vreme'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Politicki Nedeljnik Srbije'
|
description = 'Politicki Nedeljnik Srbije'
|
||||||
|
publisher = 'Vreme d.o.o.'
|
||||||
|
category = 'news, politics, Serbia'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
INDEX = 'http://www.vreme.com'
|
INDEX = 'http://www.vreme.com'
|
||||||
LOGIN = 'http://www.vreme.com/account/index.php'
|
LOGIN = 'http://www.vreme.com/account/index.php'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
, '--category', 'news, politics, Serbia'
|
, '--publisher', publisher
|
||||||
, '--publisher', 'Vreme d.o.o.'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
@ -67,9 +75,28 @@ class Vreme(BasicNewsRecipe):
|
|||||||
})
|
})
|
||||||
return [(soup.head.title.string, articles)]
|
return [(soup.head.title.string, articles)]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link'])
|
||||||
|
,dict(name='table',attrs={'xclass':'image'})
|
||||||
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '&print=yes'
|
return url + '&print=yes'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
del soup.body['text' ]
|
||||||
|
del soup.body['bgcolor']
|
||||||
|
del soup.body['onload' ]
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
tbl = soup.body.table
|
||||||
|
tbbb = soup.find('td')
|
||||||
|
if tbbb:
|
||||||
|
tbbb.extract()
|
||||||
|
tbl.extract()
|
||||||
|
soup.body.insert(0,tbbb)
|
||||||
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
@ -77,3 +104,5 @@ class Vreme(BasicNewsRecipe):
|
|||||||
if cover_item:
|
if cover_item:
|
||||||
cover_url = self.INDEX + cover_item['src']
|
cover_url = self.INDEX + cover_item['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
language = _('Serbian')
|
@ -410,6 +410,7 @@ class RecursiveFetcher(object, LoggingInterface):
|
|||||||
_fname.decode('latin1', 'replace')
|
_fname.decode('latin1', 'replace')
|
||||||
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
||||||
_fname = sanitize_file_name(_fname)
|
_fname = sanitize_file_name(_fname)
|
||||||
|
_fname = os.path.splitext(_fname)[0]+'.xhtml'
|
||||||
res = os.path.join(linkdiskpath, _fname)
|
res = os.path.join(linkdiskpath, _fname)
|
||||||
self.downloaded_paths.append(res)
|
self.downloaded_paths.append(res)
|
||||||
self.filemap[nurl] = res
|
self.filemap[nurl] = res
|
||||||
|
@ -77,7 +77,7 @@ def run_windows_install_jammer(installer):
|
|||||||
|
|
||||||
def build_windows(shutdown=True):
|
def build_windows(shutdown=True):
|
||||||
installer = installer_name('exe')
|
installer = installer_name('exe')
|
||||||
vm = '/vmware/Windows XP/Windows XP Professional.vmx'
|
vm = '/mnt/backup/calibre_windows_xp_home/calibre_windows_xp_home.vmx'
|
||||||
start_vm(vm, 'windows', BUILD_SCRIPT%('python setup.py develop', 'python','installer\\\\windows\\\\freeze.py'))
|
start_vm(vm, 'windows', BUILD_SCRIPT%('python setup.py develop', 'python','installer\\\\windows\\\\freeze.py'))
|
||||||
if os.path.exists('build/py2exe'):
|
if os.path.exists('build/py2exe'):
|
||||||
shutil.rmtree('build/py2exe')
|
shutil.rmtree('build/py2exe')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user