diff --git a/installer/windows/calibre/calibre.mpi b/installer/windows/calibre/calibre.mpi index 2b3cca3cae..ee7ab53455 100644 --- a/installer/windows/calibre/calibre.mpi +++ b/installer/windows/calibre/calibre.mpi @@ -299,7 +299,6 @@ File ::2BCD9281-2CBC-CF0D-0E12-2CE11F6ED758 -name comic2epub.exe.local -parent 8 File ::EDE6F457-C83F-C5FA-9AF4-38FDFF17D929 -name PIL._imagingtk.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::09D0906E-3611-3DB7-32CF-A140585694A7 -name win32pdh.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::4C84F0DC-7157-0C90-2062-180139B03E25 -name IM_MOD_RL_rgb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 -File ::F402F507-87C5-BDB1-80AE-AD3FF4A4BCE7 -name bzrlib._patiencediff_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::A732EDE7-4796-241F-BECA-68E59F88F8AF -name lrs2lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::69072379-7D16-B9F7-9F39-3E6403C48267 -name IM_MOD_RL_xbm_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::FBD11D98-D1E7-5DD9-BF02-01CE92518859 -name IM_MOD_RL_otb_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 @@ -365,7 +364,6 @@ File ::26741B21-C241-E100-8BB1-8B679BC3E662 -name configure.xml -parent 8E5D85A4 File ::7D491E89-C6D3-1E6E-F4BD-8E55260FE33E -name libexpat.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::A4910EB3-0F1C-F6F0-CD2D-16A64BBAA92B -name calibre-fontconfig.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::8711327A-716D-B162-6AC6-2FB4AD071266 -name fb22lrf.exe -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 -File ::0FDD3A7A-31F3-8089-CE32-D80EAA6F62B2 -name bzrlib._btree_serializer_c.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::476CB977-5155-D56F-26CA-EB243AEBBA99 -name unrar.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::2DA1CC8D-AF5C-3B03-2060-301DFE0356CC -name mobi2oeb.exe.local -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::2E2A9EDA-5386-444E-8479-557386794552 -name IM_MOD_RL_uil_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 @@ -487,7 +485,6 @@ File ::AA761ACD-B728-2324-AA75-B20A2A79F125 -name lrf2lrs.exe -parent 8E5D85A4-7 File ::95434C76-22F5-B9CE-6194-6E1B1EE3232D -name IM_MOD_RL_info_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::AAF45D03-322F-5553-63A7-312DB754A20B -name _ctypes.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::C3D351CA-A8D8-AB35-55D9-5AACF8DB37D1 -name python26.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 -File ::2F90B52F-A728-2CA4-5688-0283674695B7 -name _elementtree.pyd -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::B50B66A1-FB65-FAD5-1DD7-E894ACC07464 -name QtSvg4.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::906FF13D-D993-7192-7EA5-6D15A5A24BFB -name CORE_RL_png_.dll -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 File ::5D368661-6BF0-D6AF-7C1A-87646864EB4B -name delegates.xml -parent 8E5D85A4-7608-47A1-CF7C-309060D5FF40 @@ -552,7 +549,7 @@ SetupType ::D9ADE41C-B744-690C-2CED-CF826BF03D2E -setup Install -active Yes -pla InstallComponent 3EA07B17-04D8-6508-B535-96CC7173B49A -setup Install -type pane -conditions D7F585DB-0DEC-A94E-DAB0-94D558D82764 -title {Welcome Screen} -component Welcome -command insert -active Yes -parent StandardInstall Condition D7F585DB-0DEC-A94E-DAB0-94D558D82764 -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id D7F585DB-0DEC-A94E-DAB0-94D558D82764 -InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command reorder -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A +InstallComponent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -setup Install -type action -conditions ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -title {Check for Previous Install} -component CheckForPreviousInstall -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A Condition ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E -active Yes -parent 7CCDA4BB-861C-C21E-3011-E93DB58F07D6 -title {Execute Script Condition} -component ExecuteScriptCondition -TreeObject::id ADBCD53E-C9A6-A3CA-1AAC-0DB0CE84F71E InstallComponent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -setup Install -type action -conditions 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -title {Set Virtual Text} -component SetVirtualText -command insert -active Yes -parent 3EA07B17-04D8-6508-B535-96CC7173B49A Condition 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB -active Yes -parent 580ACF2C-517F-5E48-9DEF-7DAEFBA59FDD -title {String Is Condition} -component StringIsCondition -TreeObject::id 6DE3B369-9D6B-6BC1-4EA0-2C54ECE159EB diff --git a/installer/windows/freeze.py b/installer/windows/freeze.py index 064615f422..ab58fb669d 100644 --- a/installer/windows/freeze.py +++ b/installer/windows/freeze.py @@ -12,7 +12,7 @@ LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' PDFTOHTML = 'C:\\pdftohtml\\pdftohtml.exe' IMAGEMAGICK_DIR = 'C:\\ImageMagick' FONTCONFIG_DIR = 'C:\\fontconfig' -VC90 = r'C:\Program Files\Microsoft Visual Studio 9.0\VC\redist\x86\Microsoft.VC90.CRT' +VC90 = r'C:\VC90.CRT' import sys, os, py2exe, shutil, zipfile, glob, subprocess, re from distutils.core import setup diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index cb3c05c7b9..aaf0c348d9 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -21,6 +21,8 @@ import mechanize mimetypes.add_type('application/epub+zip', '.epub') mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs') +mimetypes.add_type('application/xhtml+xml', '.xhtml') +mimetypes.add_type('image/svg+xml', '.svg') mimetypes.add_type('application/x-sony-bbeb', '.lrf') mimetypes.add_type('application/x-dtbncx+xml', '.ncx') mimetypes.add_type('application/adobe-page-template+xml', '.xpgt') diff --git a/src/calibre/constants.py b/src/calibre/constants.py index ab5cc9f6b0..fac71aa8da 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.133' +__version__ = '0.4.134' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 5e87351375..c85fe8cc12 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -467,7 +467,7 @@ class Parser(PreProcessor, LoggingInterface): if self.htmlfile.is_binary: raise ValueError('Not a valid HTML file: '+self.htmlfile.path) src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip() - src = src.replace('\x00', '') + src = src.replace('\x00', '').replace('\r', ' ') src = self.preprocess(src) # lxml chokes on unicode input when it contains encoding declarations for pat in ENCODING_PATS: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 2051fd141e..fee96585db 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -48,34 +48,34 @@ XPNSMAP = { DC_PREFIXES = ('d11', 'd10', 'd09') -def XML(name): +def XML(name): return '{%s}%s' % (XML_NS, name) -def XHTML(name): +def XHTML(name): return '{%s}%s' % (XHTML_NS, name) -def OPF(name): +def OPF(name): return '{%s}%s' % (OPF2_NS, name) -def DC(name): +def DC(name): return '{%s}%s' % (DC11_NS, name) -def XSI(name): +def XSI(name): return '{%s}%s' % (XSI_NS, name) -def DCTERMS(name): +def DCTERMS(name): return '{%s}%s' % (DCTERMS_NS, name) -def NCX(name): +def NCX(name): return '{%s}%s' % (NCX_NS, name) -def SVG(name): +def SVG(name): return '{%s}%s' % (SVG_NS, name) -def XLINK(name): +def XLINK(name): return '{%s}%s' % (XLINK_NS, name) -def CALIBRE(name): +def CALIBRE(name): return '{%s}%s' % (CALIBRE_NS, name) def LINK_SELECTORS(): diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index e4a0cfd7fe..574d57f5fb 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -17,6 +17,7 @@ import types import re import copy from itertools import izip +from xml.dom import SyntaxErr as CSSSyntaxError import cssutils from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \ CSSValueList, cssproperties @@ -288,15 +289,19 @@ class Style(object): def _update_cssdict(self, cssdict): self._style.update(cssdict) - + def _apply_style_attr(self): attrib = self._element.attrib - if 'style' in attrib: - css = attrib['style'].split(';') - css = filter(None, map(lambda x: x.strip(), css)) + if 'style' not in attrib: + return + css = attrib['style'].split(';') + css = filter(None, (x.strip() for x in css)) + try: style = CSSStyleDeclaration('; '.join(css)) - self._style.update(self._stylizer.flatten_style(style)) - + except CSSSyntaxError: + return + self._style.update(self._stylizer.flatten_style(style)) + def _has_parent(self): return (self._element.getparent() is not None) diff --git a/src/calibre/gui2/images/news/starbulletin.png b/src/calibre/gui2/images/news/starbulletin.png new file mode 100644 index 0000000000..bb3afd636a Binary files /dev/null and b/src/calibre/gui2/images/news/starbulletin.png differ diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index e6475dd020..b88842706a 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -369,13 +369,14 @@ class Main(MainWindow, Ui_MainWindow): if r == QSystemTrayIcon.Trigger: if self.isVisible(): for window in QApplication.topLevelWidgets(): - if isinstance(window, (MainWindow, QDialog)): + if isinstance(window, (MainWindow, QDialog)) and window.isVisible(): window.hide() + setattr(window, '__systray_minimized', True) else: for window in QApplication.topLevelWidgets(): - if isinstance(window, (MainWindow, QDialog)): - if window not in (self.device_error_dialog, self.jobs_dialog): - window.show() + if getattr(window, '__systray_minimized', False): + window.show() + setattr(window, '__systray_minimized', False) def do_default_sync(self, checked): diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 14cbbfb634..161666d45d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -27,6 +27,7 @@ recipe_modules = ['recipe_' + r for r in ( 'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english', 'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda', 'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz', + 'honoluluadvertiser', 'starbulletin', 'exiled', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_ambito.py b/src/calibre/web/feeds/recipes/recipe_ambito.py index 17d33a1cde..e6e3c224e9 100644 --- a/src/calibre/web/feeds/recipes/recipe_ambito.py +++ b/src/calibre/web/feeds/recipes/recipe_ambito.py @@ -1,31 +1,38 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' ambito.com ''' - -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Ambito(BasicNewsRecipe): title = 'Ambito.com' __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' + description = 'Informacion Libre las 24 horas' + publisher = 'Ambito.com' + category = 'news, politics, Argentina' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = False - encoding = 'iso--8859-1' - language = _('Spanish') + encoding = 'iso-8859-1' cover_url = 'http://www.ambito.com/img/logo_.jpg' - + remove_javascript = True + use_embedded_content = False + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Argentina' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='div', attrs={'align':'justify'})] + + remove_tags = [dict(name=['object','link'])] feeds = [ (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' ) @@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe): def print_version(self, url): return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?') + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_b92.py b/src/calibre/web/feeds/recipes/recipe_b92.py index c20bfab8dc..611647620b 100644 --- a/src/calibre/web/feeds/recipes/recipe_b92.py +++ b/src/calibre/web/feeds/recipes/recipe_b92.py @@ -7,25 +7,33 @@ b92.net ''' import re -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe + class B92(BasicNewsRecipe): - title = u'B92' + title = 'B92' __author__ = 'Darko Miletic' - language = _('Serbian') description = 'Dnevne vesti iz Srbije i sveta' - oldest_article = 7 + oldest_article = 2 + publisher = 'B92.net' + category = 'news, politics, Serbia' max_articles_per_feed = 100 + remove_javascript = True no_stylesheets = True use_embedded_content = False cover_url = 'http://static.b92.net/images/fp/logo.gif' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ] + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Serbia' - , '--publisher', 'B92' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] feeds = [ (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml') @@ -44,3 +52,16 @@ class B92(BasicNewsRecipe): if biz: nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id return nurl + + def preprocess_html(self, soup): + soup.html['xml:lang'] = 'sr-Latn' + soup.html['lang'] = 'sr-Latn' + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(name='img',align=True): + del item['align'] + item.insert(0,'

') + return soup + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py index bee95b8424..ae75394fec 100644 --- a/src/calibre/web/feeds/recipes/recipe_blic.py +++ b/src/calibre/web/feeds/recipes/recipe_blic.py @@ -5,31 +5,49 @@ __copyright__ = '2008, Darko Miletic ' ''' blic.rs ''' -import string,re -from calibre.web.feeds.news import BasicNewsRecipe +import re + +from calibre.web.feeds.news import BasicNewsRecipe + class Blic(BasicNewsRecipe): title = u'Blic' - __author__ = 'Darko Miletic' - description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' - oldest_article = 7 + __author__ = u'Darko Miletic' + description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' + publisher = 'RINGIER d.o.o.' + category = 'news, politics, Serbia' + oldest_article = 2 max_articles_per_feed = 100 + remove_javascript = True no_stylesheets = True use_embedded_content = False - cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Serbia' - , '--publisher', 'Blic' + '--comment', description + , '--category', category + , '--publisher', publisher ] - + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - keep_only_tags = [ dict(name='div', attrs={'class':'single_news'}) ] + keep_only_tags = [dict(name='div', attrs={'class':'single_news'})] - feeds = [ (u'Vesti', u'http://www.blic.rs/rssall.php')] + feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')] + remove_tags = [dict(name=['object','link'])] + def print_version(self, url): start_url, question, rest_url = url.partition('?') return u'http://www.blic.rs/_print.php?' + rest_url + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_clarin.py b/src/calibre/web/feeds/recipes/recipe_clarin.py index cd72163c88..146719cc8b 100644 --- a/src/calibre/web/feeds/recipes/recipe_clarin.py +++ b/src/calibre/web/feeds/recipes/recipe_clarin.py @@ -1,31 +1,35 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' clarin.com ''' from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe + class Clarin(BasicNewsRecipe): title = 'Clarin' __author__ = 'Darko Miletic' description = 'Noticias de Argentina y mundo' + publisher = 'Grupo Clarin' + category = 'news, politics, Argentina' oldest_article = 2 max_articles_per_feed = 100 - language = _('Spanish') use_embedded_content = False no_stylesheets = True cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg') - + remove_javascript = True + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Argentina' - , '--publisher', 'Grupo Clarin' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' remove_tags = [ dict(name='a' , attrs={'class':'Imp' }) @@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe): rest = artl.partition('-0')[-1] lmain = rest.partition('.')[0] return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_danas.py b/src/calibre/web/feeds/recipes/recipe_danas.py index 54f75b8ad2..f9c05e7b20 100644 --- a/src/calibre/web/feeds/recipes/recipe_danas.py +++ b/src/calibre/web/feeds/recipes/recipe_danas.py @@ -5,37 +5,47 @@ __copyright__ = '2008, Darko Miletic ' ''' danas.rs ''' -import string,re -from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.web.feeds.news import BasicNewsRecipe + class Danas(BasicNewsRecipe): - title = 'Danas' + title = u'Danas' __author__ = 'Darko Miletic' - description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.' + description = 'Vesti' + publisher = 'Danas d.o.o.' + category = 'news, politics, Serbia' oldest_article = 2 max_articles_per_feed = 100 - no_stylesheets = True + no_stylesheets = False + remove_javascript = True use_embedded_content = False - cover_url = 'http://www.danas.rs/images/basic/danas.gif' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Serbia' - , '--publisher', 'Danas' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - keep_only_tags = [ dict(name='div', attrs={'id':'left'}) ] + keep_only_tags = [dict(name='div', attrs={'id':'left'})] remove_tags = [ - dict(name='div', attrs={'class':'width_1_4' }) - ,dict(name='div', attrs={'class':'metaClanka' }) - ,dict(name='div', attrs={'id':'comments' }) - ,dict(name='div', attrs={'class':'baner' }) - ,dict(name='div', attrs={'class':'slikaClanka'}) + dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']}) + ,dict(name='div', attrs={'id':'comments'}) + ,dict(name=['object','link']) ] - feeds = [(u'Vesti', u'http://www.danas.rs/rss/rss.asp')] + feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')] - def print_version(self, url): - return url + '&action=print' + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_de_standaard.py b/src/calibre/web/feeds/recipes/recipe_de_standaard.py index f247c14d5c..de456b9169 100644 --- a/src/calibre/web/feeds/recipes/recipe_de_standaard.py +++ b/src/calibre/web/feeds/recipes/recipe_de_standaard.py @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class DeStandaard(BasicNewsRecipe): title = u'De Standaard' __author__ = u'Darko Miletic' - language = _('French') + language = _('Dutch') description = u'News from Belgium' oldest_article = 7 max_articles_per_feed = 100 diff --git a/src/calibre/web/feeds/recipes/recipe_demorgen_be.py b/src/calibre/web/feeds/recipes/recipe_demorgen_be.py index 8de95bd4aa..5dc912aa3e 100644 --- a/src/calibre/web/feeds/recipes/recipe_demorgen_be.py +++ b/src/calibre/web/feeds/recipes/recipe_demorgen_be.py @@ -13,6 +13,7 @@ class DeMorganBe(BasicNewsRecipe): __author__ = u'Darko Miletic' description = u'News from Belgium' oldest_article = 7 + language = _('Dutch') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False diff --git a/src/calibre/web/feeds/recipes/recipe_el_mercurio_chile.py b/src/calibre/web/feeds/recipes/recipe_el_mercurio_chile.py index 0b7d994b34..fb11d54072 100644 --- a/src/calibre/web/feeds/recipes/recipe_el_mercurio_chile.py +++ b/src/calibre/web/feeds/recipes/recipe_el_mercurio_chile.py @@ -5,31 +5,36 @@ __copyright__ = '2009, Darko Miletic ' ''' emol.com ''' -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe + class ElMercurio(BasicNewsRecipe): title = 'El Mercurio online' - language = _('Spanish') __author__ = 'Darko Miletic' - description = 'El sitio de noticias online de Chile' + description = 'El sitio de noticias online de Chile' + publisher = 'El Mercurio' + category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif' - + remove_javascript = True + use_embedded_content = False + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Chile' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [ dict(name='div', attrs={'class':'despliegue-txt_750px'}) ,dict(name='div', attrs={'id':'div_cuerpo_participa'}) ] - remove_tags = [ dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'}) @@ -45,4 +50,12 @@ class ElMercurio(BasicNewsRecipe): ,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5') ,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7') ] + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_elargentino.py b/src/calibre/web/feeds/recipes/recipe_elargentino.py index ef79c96baa..1801c81b81 100644 --- a/src/calibre/web/feeds/recipes/recipe_elargentino.py +++ b/src/calibre/web/feeds/recipes/recipe_elargentino.py @@ -1,30 +1,34 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' elargentino.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class ElArgentino(BasicNewsRecipe): title = 'ElArgentino.com' __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' - language = _('Spanish') + description = 'Informacion Libre las 24 horas' + publisher = 'ElArgentino.com' + category = 'news, politics, Argentina' oldest_article = 2 max_articles_per_feed = 100 + remove_javascript = True no_stylesheets = True use_embedded_content = False encoding = 'utf8' cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png' html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Argentina' - , '--publisher' , 'ElArgentino.com' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' remove_tags = [ dict(name='div', attrs={'id':'noprint' }) @@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe): return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id def preprocess_html(self, soup): - mtag = '' + mtag = '\n\n' soup.head.insert(0,mtag) - soup.prettify() + for item in soup.findAll(style=True): + del item['style'] return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_elmundo.py b/src/calibre/web/feeds/recipes/recipe_elmundo.py index 9fd6eefb35..3ecedb5822 100644 --- a/src/calibre/web/feeds/recipes/recipe_elmundo.py +++ b/src/calibre/web/feeds/recipes/recipe_elmundo.py @@ -6,41 +6,55 @@ __copyright__ = '2009, Darko Miletic ' elmundo.es ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class ElMundo(BasicNewsRecipe): title = 'El Mundo' __author__ = 'Darko Miletic' description = 'News from Spain' - language = _('Spanish') + publisher = 'El Mundo' + category = 'news, politics, Spain' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'iso8859_15' cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Spain' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - keep_only_tags = [dict(name='div', attrs={'class':'noticia'})] - + + keep_only_tags = [ + dict(name='div', attrs={'id':['bloqueprincipal','noticia']}) + ,dict(name='div', attrs={'class':['contenido_noticia_01']}) + ] remove_tags = [ - dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']}) + dict(name='div', attrs={'class':['herramientas','publicidad_google']}) ,dict(name='div', attrs={'id':'modulo_multimedia' }) - ,dict(name=['object','script','link', 'a']) - ,dict(name='ul', attrs={'class':'herramientas'}) + ,dict(name='ul', attrs={'class':'herramientas' }) + ,dict(name=['object','link']) ] feeds = [ (u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' ) - ,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76') ,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' ) ,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' ) ,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' ) ,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' ) ,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26') + ,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76') ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_estadao.py b/src/calibre/web/feeds/recipes/recipe_estadao.py index 97fcea4b8a..4b23fdb235 100644 --- a/src/calibre/web/feeds/recipes/recipe_estadao.py +++ b/src/calibre/web/feeds/recipes/recipe_estadao.py @@ -6,26 +6,29 @@ __copyright__ = '2009, Darko Miletic ' estadao.com.br ''' -from calibre.web.feeds.news import BasicNewsRecipe - - +from calibre.web.feeds.news import BasicNewsRecipe + class Estadao(BasicNewsRecipe): title = 'O Estado de S. Paulo' __author__ = 'Darko Miletic' - description = 'News from Brasil' - language = _('Spanish') + description = 'News from Brasil in Portugese' + publisher = 'O Estado de S. Paulo' + category = 'news, politics, Brasil' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf8' cover_url = 'http://www.estadao.com.br/img/logo_estadao.png' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Brasil' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'id':'c1'})] @@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe): ifr = soup.find('iframe') if ifr: ifr.extract() + for item in soup.findAll(style=True): + del item['style'] return soup + + language = _('Portugese') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_exiled.py b/src/calibre/web/feeds/recipes/recipe_exiled.py new file mode 100644 index 0000000000..e70e047e0d --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_exiled.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +exiledonline.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Exiled(BasicNewsRecipe): + title = 'Exiled Online' + __author__ = 'Darko Miletic' + description = "Mankind's only alternative since 1997 - Formerly known as The eXile" + publisher = 'Exiled Online' + language = _('English') + category = 'news, politics, international' + oldest_article = 15 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf8' + remove_javascript = True + cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher' , publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='div', attrs={'id':'main'})] + + remove_tags = [ + dict(name=['object','link']) + ,dict(name='div', attrs={'class':'info'}) + ,dict(name='div', attrs={'id':['comments','navig']}) + ] + + + feeds = [(u'Articles', u'http://exiledonline.com/feed/' )] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + mtag = '\n\n\n' + soup.head.insert(0,mtag) + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_granma.py b/src/calibre/web/feeds/recipes/recipe_granma.py index 43cbd32ae1..66ebba1d64 100644 --- a/src/calibre/web/feeds/recipes/recipe_granma.py +++ b/src/calibre/web/feeds/recipes/recipe_granma.py @@ -7,27 +7,30 @@ granma.cubaweb.cu ''' import urllib - -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Granma(BasicNewsRecipe): title = 'Diario Granma' __author__ = 'Darko Miletic' - language = _('Spanish') description = 'Organo oficial del Comite Central del Partido Comunista de Cuba' + publisher = 'Granma' + category = 'news, politics, Cuba' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Cuba' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher , '--ignore-tables' ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='table', attrs={'height':'466'})] @@ -35,9 +38,15 @@ class Granma(BasicNewsRecipe): def preprocess_html(self, soup): - del soup.body.table['style'] - rtag = soup.find('td', attrs={'height':'458'}) - if rtag: - del rtag['style'] + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll('table'): + if item.has_key('width'): + del item['width'] + if item.has_key('height'): + del item['height'] + for item in soup.findAll(style=True): + del item['style'] return soup + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_harpers_full.py b/src/calibre/web/feeds/recipes/recipe_harpers_full.py index adf0bf82a3..72e633bde0 100644 --- a/src/calibre/web/feeds/recipes/recipe_harpers_full.py +++ b/src/calibre/web/feeds/recipes/recipe_harpers_full.py @@ -1,62 +1,80 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -harpers.org - paid subscription/ printed issue articles -This recipe only get's article's published in text format -images and pdf's are ignored -''' - -from calibre import strftime +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008-2009, Darko Miletic ' +''' +harpers.org - paid subscription/ printed issue articles +This recipe only get's article's published in text format +images and pdf's are ignored +''' + +from calibre import strftime + from calibre.web.feeds.news import BasicNewsRecipe -class Harpers_full(BasicNewsRecipe): - title = u"Harper's Magazine - articles from printed edition" - __author__ = u'Darko Miletic' - description = u"Harper's Magazine: Founded June 1850." - language = _('English') - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - simultaneous_downloads = 1 - delay = 1 - needs_subscription = True - INDEX = strftime('http://www.harpers.org/archive/%Y/%m') - LOGIN = 'http://www.harpers.org' - cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif') - - keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ] - remove_tags = [ - dict(name='table', attrs={'class':'rcnt'}) - ,dict(name='table', attrs={'class':'rcnt topline'}) - ] - - def get_browser(self): - br = BasicNewsRecipe.get_browser() - if self.username is not None and self.password is not None: - br.open(self.LOGIN) - br.select_form(nr=1) - br['handle' ] = self.username - br['password'] = self.password - br.submit() - return br +class Harpers_full(BasicNewsRecipe): + title = u"Harper's Magazine - articles from printed edition" + __author__ = u'Darko Miletic' + description = u"Harper's Magazine: Founded June 1850." + publisher = "Harpers's" + category = 'news, politics, USA' + oldest_article = 30 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + simultaneous_downloads = 1 + delay = 1 + needs_subscription = True + INDEX = strftime('http://www.harpers.org/archive/%Y/%m') + LOGIN = 'http://www.harpers.org' + cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif') + remove_javascript = True + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ] + remove_tags = [ + dict(name='table', attrs={'class':'rcnt'}) + ,dict(name='table', attrs={'class':'rcnt topline'}) + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open(self.LOGIN) + br.select_form(nr=1) + br['handle' ] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + articles = [] + print 'Processing ' + self.INDEX + soup = self.index_to_soup(self.INDEX) + for item in soup.findAll('div', attrs={'class':'title'}): + text_link = item.parent.find('img',attrs={'alt':'Text'}) + if text_link: + url = self.LOGIN + item.a['href'] + title = item.a.contents[0] + date = strftime(' %B %Y') + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':'' + }) + return [(soup.head.title.string, articles)] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup - def parse_index(self): - articles = [] - print 'Processing ' + self.INDEX - soup = self.index_to_soup(self.INDEX) - for item in soup.findAll('div', attrs={'class':'title'}): - text_link = item.parent.find('img',attrs={'alt':'Text'}) - if text_link: - url = self.LOGIN + item.a['href'] - title = item.a.contents[0] - date = strftime(' %B %Y') - articles.append({ - 'title' :title - ,'date' :date - ,'url' :url - ,'description':'' - }) - return [(soup.head.title.string, articles)] \ No newline at end of file + language = _('English') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py b/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py new file mode 100644 index 0000000000..ef36a700ed --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +honoluluadvertiser.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Honoluluadvertiser(BasicNewsRecipe): + title = 'Honolulu Advertiser' + __author__ = 'Darko Miletic' + description = "Latest national and local Hawaii sports news from The Honolulu Advertiser." + publisher = 'Honolulu Advertiser' + category = 'news, Honolulu, Hawaii' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + remove_javascript = True + cover_url = 'http://www.honoluluadvertiser.com/graphics/branding.gif' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher' , publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='td')] + + remove_tags = [dict(name=['object','link'])] + + + feeds = [ + (u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' ) + ,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' ) + ,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' ) + ,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' ) + ,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' ) + ,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + mtag = '\n\n' + soup.head.insert(0,mtag) + return soup + + def print_version(self, url): + ubody, sep, rest = url.rpartition('/-1/') + root, sep2, article_id = ubody.partition('/article/') + return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart' + diff --git a/src/calibre/web/feeds/recipes/recipe_infobae.py b/src/calibre/web/feeds/recipes/recipe_infobae.py index 5acbcfa763..40e720f94c 100644 --- a/src/calibre/web/feeds/recipes/recipe_infobae.py +++ b/src/calibre/web/feeds/recipes/recipe_infobae.py @@ -1,34 +1,36 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' infobae.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Infobae(BasicNewsRecipe): title = 'Infobae.com' __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' + description = 'Informacion Libre las 24 horas' + publisher = 'Infobae.com' + category = 'news, politics, Argentina' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'iso-8859-1' cover_url = 'http://www.infobae.com/imgs/header/header.gif' - - html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Argentina' - , '--publisher' , 'Infobae.com' - ] - - + remove_javascript = True - feeds = [ + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) ,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' ) ,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml') @@ -37,5 +39,14 @@ class Infobae(BasicNewsRecipe): def print_version(self, url): main, sep, article_part = url.partition('contenidos/') - article_id, rsep, rrest = article_part.partition('-') + article_id, rsep, rrest = article_part.partition('-') return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id + + def preprocess_html(self, soup): + mtag = '\n\n' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_jb_online.py b/src/calibre/web/feeds/recipes/recipe_jb_online.py index c94ab25f05..4ac4b70836 100644 --- a/src/calibre/web/feeds/recipes/recipe_jb_online.py +++ b/src/calibre/web/feeds/recipes/recipe_jb_online.py @@ -6,25 +6,29 @@ __copyright__ = '2009, Darko Miletic ' jbonline.terra.com.br ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class JBOnline(BasicNewsRecipe): title = 'Jornal Brasileiro Online' __author__ = 'Darko Miletic' - description = 'News from Brasil' + description = 'News from Brasil' + publisher = 'Jornal Brasileiro' + category = 'news, politics, Brasil' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Brasil' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})] @@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe): ifr = soup.find('iframe') if ifr: ifr.extract() - item = soup.find('div', attrs={'id':'corpoNoticia'}) - if item: - del item['style'] + for item in soup.findAll(style=True): + del item['style'] return soup + + language = _('Portugese') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_jutarnji.py b/src/calibre/web/feeds/recipes/recipe_jutarnji.py index f2504a78c1..194d2163f9 100644 --- a/src/calibre/web/feeds/recipes/recipe_jutarnji.py +++ b/src/calibre/web/feeds/recipes/recipe_jutarnji.py @@ -6,28 +6,35 @@ __copyright__ = '2008, Darko Miletic ' jutarnji.hr ''' -import string, re -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup +import re +from calibre.web.feeds.news import BasicNewsRecipe + class Jutarnji(BasicNewsRecipe): - title = 'Jutarnji' - __author__ = 'Darko Miletic' - description = 'Online izdanje Jutarnjeg lista' + title = u'Jutarnji' + __author__ = u'Darko Miletic' + description = u'Hrvatski portal' + publisher = 'Jutarnji.hr' + category = 'news, politics, Croatia' oldest_article = 2 max_articles_per_feed = 100 simultaneous_downloads = 1 delay = 1 no_stylesheets = True use_embedded_content = False + remove_javascript = True encoding = 'cp1250' - cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Croatia' - , '--publisher', 'Europapress holding d.o.o.' - ] + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ @@ -49,11 +56,16 @@ class Jutarnji(BasicNewsRecipe): def print_version(self, url): main, split, rest = url.partition('.jl') rmain, rsplit, rrest = main.rpartition(',') - return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest + return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) - soup.prettify() + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(width=True): + del item['width'] return soup \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py index 5fa9f45e41..eea510a7cd 100644 --- a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py +++ b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py @@ -7,26 +7,30 @@ juventudrebelde.cu ''' from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Juventudrebelde(BasicNewsRecipe): title = 'Juventud Rebelde' __author__ = 'Darko Miletic' - description = 'Diario de la Juventud Cubana' + description = 'Diario de la Juventud Cubana' + publisher = 'Juventud rebelde' + category = 'news, politics, Cuba' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg') - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Cuba' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher , '--ignore-tables' ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] @@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe): ,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' ) ] - + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_juventudrebelde_english.py b/src/calibre/web/feeds/recipes/recipe_juventudrebelde_english.py index e7c1002323..5bae2b6e9c 100644 --- a/src/calibre/web/feeds/recipes/recipe_juventudrebelde_english.py +++ b/src/calibre/web/feeds/recipes/recipe_juventudrebelde_english.py @@ -5,30 +5,40 @@ __copyright__ = '2008, Darko Miletic ' ''' juventudrebelde.co.cu ''' -from calibre import strftime - -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe + class Juventudrebelde_english(BasicNewsRecipe): title = 'Juventud Rebelde in english' __author__ = 'Darko Miletic' - description = 'The newspaper of Cuban Youth' - language = _('English') + description = 'The newspaper of Cuban Youth' + publisher = 'Juventud Rebelde' + category = 'news, politics, Cuba' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'iso-8859-1' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Cuba' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher , '--ignore-tables' ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'class':'read'})] feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )] + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + language = _('English') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_la_cuarta.py b/src/calibre/web/feeds/recipes/recipe_la_cuarta.py index e5576cd442..17bc708245 100644 --- a/src/calibre/web/feeds/recipes/recipe_la_cuarta.py +++ b/src/calibre/web/feeds/recipes/recipe_la_cuarta.py @@ -6,30 +6,33 @@ __copyright__ = '2009, Darko Miletic ' lacuarta.cl ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class LaCuarta(BasicNewsRecipe): title = 'La Cuarta' __author__ = 'Darko Miletic' - description = 'El sitio de noticias online de Chile' + description = 'La Cuarta Cibernetica: El Diario popular' + publisher = 'CODISA, Consorcio Digital S.A.' + category = 'news, politics, entertainment, Chile' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Chile' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ] remove_tags = [ - dict(name='script') - ,dict(name='ul') + dict(name='ul') ,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']}) ,dict(name='div', attrs={'class':['par ad-1','par ad-2']}) ,dict(name='input') @@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe): ,dict(name='strong', text='PUBLICIDAD') ] + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')] + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_la_segunda.py b/src/calibre/web/feeds/recipes/recipe_la_segunda.py index 7f5415806d..d049d9c92b 100644 --- a/src/calibre/web/feeds/recipes/recipe_la_segunda.py +++ b/src/calibre/web/feeds/recipes/recipe_la_segunda.py @@ -6,26 +6,29 @@ __copyright__ = '2009, Darko Miletic ' lasegunda.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class LaSegunda(BasicNewsRecipe): title = 'La Segunda' __author__ = 'Darko Miletic' - description = 'El sitio de noticias online de Chile' - language = _('Spanish') + description = 'El sitio de noticias online de Chile' + publisher = 'La Segunda' + category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Chile' - , '--publisher' , title - , '--ignore-tables' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='table')] @@ -45,4 +48,14 @@ class LaSegunda(BasicNewsRecipe): def print_version(self, url): rest, sep, article_id = url.partition('index.asp?idnoticia=') return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(name='table', width=True): + del item['width'] + for item in soup.findAll(style=True): + del item['style'] + return soup + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_la_tercera.py b/src/calibre/web/feeds/recipes/recipe_la_tercera.py index 65b0e630df..a0a2f94ee3 100644 --- a/src/calibre/web/feeds/recipes/recipe_la_tercera.py +++ b/src/calibre/web/feeds/recipes/recipe_la_tercera.py @@ -6,26 +6,30 @@ __copyright__ = '2009, Darko Miletic ' latercera.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class LaTercera(BasicNewsRecipe): title = 'La Tercera' __author__ = 'Darko Miletic' - description = 'El sitio de noticias online de Chile' + description = 'El sitio de noticias online de Chile' + publisher = 'La Tercera' + category = 'news, politics, Chile' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = False encoding = 'cp1252' - + remove_javascript = True + use_embedded_content = False + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Chile' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ] + keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ] remove_tags = [ dict(name='script') @@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe): ,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657') ] + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_lanacion.py b/src/calibre/web/feeds/recipes/recipe_lanacion.py index 6cf2afdc89..6675fc9b21 100644 --- a/src/calibre/web/feeds/recipes/recipe_lanacion.py +++ b/src/calibre/web/feeds/recipes/recipe_lanacion.py @@ -1,29 +1,32 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' lanacion.com.ar ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Lanacion(BasicNewsRecipe): title = 'La Nacion' __author__ = 'Darko Miletic' - description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!' + description = 'Noticias de Argentina y el resto del mundo' + publisher = 'La Nacion' + category = 'news, politics, Argentina' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 - no_stylesheets = True use_embedded_content = False + remove_javascript = True + no_stylesheets = True html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Argentina' - , '--publisher', 'La Nacion SA' - ] + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})] remove_tags = [ @@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe): ,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' ) ] - def get_cover_url(self): - index = 'http://www.lanacion.com.ar' - cover_url = None - soup = self.index_to_soup(index) - cover_item = soup.find('img',attrs={'class':'logo'}) - if cover_item: - cover_url = index + cover_item['src'] - return cover_url + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_lanacion_chile.py b/src/calibre/web/feeds/recipes/recipe_lanacion_chile.py index 81f31f07d8..8bd521df4b 100644 --- a/src/calibre/web/feeds/recipes/recipe_lanacion_chile.py +++ b/src/calibre/web/feeds/recipes/recipe_lanacion_chile.py @@ -7,25 +7,29 @@ lanacion.cl ''' import urllib -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class LaNacionChile(BasicNewsRecipe): title = 'La Nacion Chile' __author__ = 'Darko Miletic' - description = 'El sitio de noticias online de Chile' + description = 'El sitio de noticias online de Chile' + publisher = 'La Nacion' + category = 'news, politics, Chile' oldest_article = 2 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Chile' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'class':'bloque'})] @@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe): item = soup.find('a', attrs={'href':'javascript:window.close()'}) if item: item.extract() + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] return soup + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_laprensa.py b/src/calibre/web/feeds/recipes/recipe_laprensa.py index 7c2567f8e6..f2064c1f6b 100644 --- a/src/calibre/web/feeds/recipes/recipe_laprensa.py +++ b/src/calibre/web/feeds/recipes/recipe_laprensa.py @@ -1,31 +1,35 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' laprensa.com.ar ''' import urllib -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class LaPrensa(BasicNewsRecipe): title = 'La Prensa' __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' + description = 'Informacion Libre las 24 horas' + publisher = 'La Prensa' + category = 'news, politics, Argentina' oldest_article = 7 - language = _('Spanish') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Argentina' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' feeds = [ (u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' ) @@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe): def preprocess_html(self, soup): del soup.body['onload'] + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] return soup + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nin.py b/src/calibre/web/feeds/recipes/recipe_nin.py index 65195b5616..d180f2b221 100644 --- a/src/calibre/web/feeds/recipes/recipe_nin.py +++ b/src/calibre/web/feeds/recipes/recipe_nin.py @@ -7,12 +7,15 @@ nin.co.yu ''' import re, urllib -from calibre.web.feeds.news import BasicNewsRecipe -class Nin(BasicNewsRecipe): +from calibre.web.feeds.news import BasicNewsRecipe + +class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' description = 'Nedeljne informativne novine' + publisher = 'NIN' + category = 'news, politics, Serbia' no_stylesheets = True oldest_article = 15 simultaneous_downloads = 1 @@ -22,11 +25,17 @@ class Nin(BasicNewsRecipe): PREFIX = 'http://www.nin.co.yu' INDEX = PREFIX + '/?change_lang=ls' LOGIN = PREFIX + '/?logout=true' + remove_javascript = True + use_embedded_content = False + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, politics, Serbia' - , '--publisher' , 'NIN' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -53,3 +62,12 @@ class Nin(BasicNewsRecipe): if link_item: cover_url = self.PREFIX + link_item['src'] return cover_url + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_novosti.py b/src/calibre/web/feeds/recipes/recipe_novosti.py index 4ff225fd0a..136302c573 100644 --- a/src/calibre/web/feeds/recipes/recipe_novosti.py +++ b/src/calibre/web/feeds/recipes/recipe_novosti.py @@ -5,31 +5,45 @@ __copyright__ = '2008, Darko Miletic ' ''' novosti.rs ''' -import string,re -from calibre.web.feeds.news import BasicNewsRecipe +import re + +from calibre.web.feeds.news import BasicNewsRecipe + class Novosti(BasicNewsRecipe): - title = 'Vecernje Novosti' - __author__ = 'Darko Miletic' - description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti' + title = u'Vecernje Novosti' + __author__ = u'Darko Miletic' + description = u'Vesti' + publisher = 'Kompanija Novosti' + category = 'news, politics, Serbia' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + encoding = 'utf8' + remove_javascript = True + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Serbia' - , '--publisher', 'Novosti AD' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - keep_only_tags = [ dict(name='div', attrs={'class':'jednaVest'}) ] - remove_tags_after = dict(name='div', attrs={'class':'info_bottom'}) - remove_tags = [ - dict(name='div', attrs={'class':'info'}) - ,dict(name='div', attrs={'class':'info_bottom'}) - ] + keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})] + remove_tags = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})] - feeds = [ (u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] + feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nspm.py b/src/calibre/web/feeds/recipes/recipe_nspm.py index d828636545..4cc6d50ca0 100644 --- a/src/calibre/web/feeds/recipes/recipe_nspm.py +++ b/src/calibre/web/feeds/recipes/recipe_nspm.py @@ -6,35 +6,55 @@ __copyright__ = '2008, Darko Miletic ' nspm.rs ''' -import string,re -from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.web.feeds.news import BasicNewsRecipe + class Nspm(BasicNewsRecipe): title = u'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' + publisher = 'NSPM' + category = 'news, politics, Serbia' oldest_article = 7 - language = _('Serbian') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False INDEX = 'http://www.nspm.rs/?alphabet=l' - cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg' + encoding = 'utf8' + remove_javascript = True + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, politics, Serbia' - , '--publisher', 'IIC NSPM' + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - + remove_tags = [dict(name='a')] + def get_browser(self): br = BasicNewsRecipe.get_browser() br.open(self.INDEX) return br - feeds = [ (u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')] + feeds = [(u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')] def print_version(self, url): return url.replace('.html','/stampa.html') + + def preprocess_html(self, soup): + soup.html['xml:lang'] = 'sr-Latn-RS' + soup.html['lang'] = 'sr-Latn-RS' + ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'}) + if ftag: + ftag['content'] = 'sr-Latn-RS' + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_o_globo.py b/src/calibre/web/feeds/recipes/recipe_o_globo.py index 1331ea86a3..f4f78d54b8 100644 --- a/src/calibre/web/feeds/recipes/recipe_o_globo.py +++ b/src/calibre/web/feeds/recipes/recipe_o_globo.py @@ -6,25 +6,29 @@ __copyright__ = '2009, Darko Miletic ' oglobo.globo.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class OGlobo(BasicNewsRecipe): title = 'O Globo' __author__ = 'Darko Miletic' - description = 'News from Brasil' + description = 'News from Brasil' + publisher = 'O Globo' + category = 'news, politics, Brasil' oldest_article = 2 max_articles_per_feed = 100 - language = _('Spanish') no_stylesheets = True use_embedded_content = False encoding = 'cp1252' cover_url = 'http://oglobo.globo.com/_img/o-globo.png' - + remove_javascript = True + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Brasil' - , '--publisher' , title + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})] @@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe): ,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml') ,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml') ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Portugese') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_pagina12.py b/src/calibre/web/feeds/recipes/recipe_pagina12.py index 8428a9a35f..b821ed0b68 100644 --- a/src/calibre/web/feeds/recipes/recipe_pagina12.py +++ b/src/calibre/web/feeds/recipes/recipe_pagina12.py @@ -1,31 +1,36 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' pagina12.com.ar ''' from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe + class Pagina12(BasicNewsRecipe): title = u'Pagina/12' __author__ = 'Darko Miletic' description = 'Noticias de Argentina y el resto del mundo' - language = _('Spanish') + publisher = 'La Pagina S.A.' + category = 'news, politics, Argentina' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = False encoding = 'cp1252' cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg') - + remove_javascript = True + use_embedded_content = False + html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Argentina' - , '--publisher' , 'La Pagina S.A.' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' remove_tags = [ @@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe): def print_version(self, url): return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/') + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_pescanik.py b/src/calibre/web/feeds/recipes/recipe_pescanik.py index 04d7d88803..e3385e02aa 100644 --- a/src/calibre/web/feeds/recipes/recipe_pescanik.py +++ b/src/calibre/web/feeds/recipes/recipe_pescanik.py @@ -6,30 +6,53 @@ __copyright__ = '2008, Darko Miletic ' pescanik.net ''' -import string,re -from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.web.feeds.news import BasicNewsRecipe + class Pescanik(BasicNewsRecipe): title = 'Pescanik' __author__ = 'Darko Miletic' description = 'Pescanik' + publisher = 'Pescanik' + category = 'news, politics, Serbia' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - html2lrf_options = ['--base-font-size', '10'] - html2epub_options = 'base_font_size = "10pt"' + remove_javascript = True + encoding = 'utf8' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png" preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - remove_tags_after = dict(name='div', attrs={'class':'article_seperator'}) - - remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})] + remove_tags = [ + dict(name='td' , attrs={'class':'buttonheading'}) + ,dict(name='span', attrs={'class':'article_seperator'}) + ,dict(name=['object','link']) + ] feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')] def print_version(self, url): - nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php') + nurl = url.replace('/index.php','/index2.php') return nurl + '&pop=1&page=0' + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_politika.py b/src/calibre/web/feeds/recipes/recipe_politika.py index 949a9b781d..1575d8984f 100644 --- a/src/calibre/web/feeds/recipes/recipe_politika.py +++ b/src/calibre/web/feeds/recipes/recipe_politika.py @@ -5,37 +5,61 @@ __copyright__ = '2008, Darko Miletic ' ''' politika.rs ''' -import string,re -from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.web.feeds.news import BasicNewsRecipe + class Politika(BasicNewsRecipe): - title = 'Politika Online' + title = u'Politika Online' __author__ = 'Darko Miletic' - description = 'Najstariji dnevni list na Balkanu' + description = 'Najstariji dnevni list na Balkanu' + publisher = 'Politika novine i Magazini d.o.o' + category = 'news, politics, Serbia' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True - extra_css = '.content_center_border {text-align: left;}' use_embedded_content = False - cover_url = 'http://www.politika.rs:8080/images/politika.gif' + remove_javascript = True + encoding = 'utf8' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, Serbia' - , '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.' + '--comment', description + , '--category', category + , '--publisher', publisher ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - keep_only_tags = [ dict(name='div', attrs={'class':'contentcenter'}) ] - remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'}) + keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})] + + remove_tags = [ + dict(name='div', attrs={'class':['send_print','txt-komentar']}) + ,dict(name=['object','link','a']) + ,dict(name='h1', attrs={'class':'box_header-tags'}) + ] + feeds = [ (u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' ) ,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' ) + ,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml') ,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' ) ,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' ) ,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' ) ,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' ) ,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' ) ] + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + ftag = soup.find('div',attrs={'class':'content_center_border'}) + if ftag: + ftag['align'] = 'left' + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py b/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py index 7de4efa80e..8c22262904 100644 --- a/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py +++ b/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py @@ -17,7 +17,7 @@ class PetersburgTimes(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - language = _('Russian') + language = _('English') INDEX = 'http://www.sptimes.ru' def parse_index(self): @@ -36,4 +36,4 @@ class PetersburgTimes(BasicNewsRecipe): 'url':url, 'description':description }) - return [(soup.head.title.string, articles)] \ No newline at end of file + return [(soup.head.title.string, articles)] diff --git a/src/calibre/web/feeds/recipes/recipe_starbulletin.py b/src/calibre/web/feeds/recipes/recipe_starbulletin.py new file mode 100644 index 0000000000..db99ebcec7 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_starbulletin.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +starbulletin.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Starbulletin(BasicNewsRecipe): + title = 'Honolulu Star-Bulletin' + __author__ = 'Darko Miletic' + description = "Latest national and local Hawaii sports news" + publisher = 'Honolulu Star-Bulletin' + category = 'news, Honolulu, Hawaii' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf8' + remove_javascript = True + cover_url = 'http://media.starbulletin.com/designimages/spacer.gif' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher' , publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [ dict(name='div', attrs={'id':'storyColoumn'}) ] + + remove_tags = [ + dict(name=['object','link']) + ,dict(name='span', attrs={'id':'printdesc'}) + ,dict(name='div' , attrs={'class':'lightGreyBox storyTools clearAll'}) + ,dict(name='div' , attrs={'id':'breadcrumbs'}) + ] + + feeds = [ + (u'Headlines', u'http://www.starbulletin.com/starbulletin_headlines.rss' ) + ,(u'News', u'http://www.starbulletin.com/news/index.rss' ) + ,(u'Sports', u'http://www.starbulletin.com/sports/index.rss' ) + ,(u'Features', u'http://www.starbulletin.com/features/index.rss' ) + ,(u'Editorials', u'http://www.starbulletin.com/editorials/index.rss' ) + ,(u'Business', u'http://www.starbulletin.com/business/index.rss' ) + ,(u'Travel', u'http://www.starbulletin.com/travel/index.rss' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + mtag = '\n\n' + soup.head.insert(0,mtag) + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_vijesti.py b/src/calibre/web/feeds/recipes/recipe_vijesti.py index beb6f64f65..98a7736a96 100644 --- a/src/calibre/web/feeds/recipes/recipe_vijesti.py +++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py @@ -1,37 +1,49 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2009, Darko Miletic ' ''' vijesti.cg.yu ''' -import string,re - -from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.web.feeds.news import BasicNewsRecipe + class Vijesti(BasicNewsRecipe): title = 'Vijesti' __author__ = 'Darko Miletic' - description = 'News from Montenegro' - oldest_article = 2 + description = 'News from Montenegro' + publisher = 'Daily Press Vijesti' + category = 'news, politics, Montenegro' + oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = False + remove_javascript = True encoding = 'cp1250' cover_url = 'http://www.vijesti.cg.yu/img/logo.gif' - + remove_javascript = True + use_embedded_content = False + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Montenegro' - , '--publisher' , 'Daily Press Vijesti' - ] - keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})] + remove_tags = [ + dict(name='div', attrs={'align':'right'}) + ,dict(name=['object','link']) + ] + feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )] def preprocess_html(self, soup): @@ -39,4 +51,10 @@ class Vijesti(BasicNewsRecipe): soup.html['lang'] = 'sr-Latn-ME' mtag = '' soup.head.insert(0,mtag) + for item in soup.findAll('img'): + if item.has_key('align'): + del item['align'] + item.insert(0,'

') return soup + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_vreme.py b/src/calibre/web/feeds/recipes/recipe_vreme.py index daee3ba3f7..27697acf8e 100644 --- a/src/calibre/web/feeds/recipes/recipe_vreme.py +++ b/src/calibre/web/feeds/recipes/recipe_vreme.py @@ -6,26 +6,34 @@ __copyright__ = '2008, Darko Miletic ' vreme.com ''' -import string,re +import re from calibre import strftime -from calibre.web.feeds.recipes import BasicNewsRecipe -class Vreme(BasicNewsRecipe): - - title = 'Vreme' - __author__ = 'Darko Miletic' - description = 'Politicki Nedeljnik Srbije' +from calibre.web.feeds.news import BasicNewsRecipe + +class Vreme(BasicNewsRecipe): + title = 'Vreme' + __author__ = 'Darko Miletic' + description = 'Politicki Nedeljnik Srbije' + publisher = 'Vreme d.o.o.' + category = 'news, politics, Serbia' no_stylesheets = True + remove_javascript = True needs_subscription = True INDEX = 'http://www.vreme.com' LOGIN = 'http://www.vreme.com/account/index.php' + remove_javascript = True + use_embedded_content = False + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + html2lrf_options = [ - '--comment', description - , '--base-font-size', '10' - , '--category', 'news, politics, Serbia' - , '--publisher', 'Vreme d.o.o.' + '--comment', description + , '--category', category + , '--publisher', publisher ] - + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] def get_browser(self): @@ -66,10 +74,29 @@ class Vreme(BasicNewsRecipe): ,'description':description }) return [(soup.head.title.string, articles)] + + remove_tags = [ + dict(name=['object','link']) + ,dict(name='table',attrs={'xclass':'image'}) + ] def print_version(self, url): return url + '&print=yes' + def preprocess_html(self, soup): + del soup.body['text' ] + del soup.body['bgcolor'] + del soup.body['onload' ] + mtag = '' + soup.head.insert(0,mtag) + tbl = soup.body.table + tbbb = soup.find('td') + if tbbb: + tbbb.extract() + tbl.extract() + soup.body.insert(0,tbbb) + return soup + def get_cover_url(self): cover_url = None soup = self.index_to_soup(self.INDEX) @@ -77,3 +104,5 @@ class Vreme(BasicNewsRecipe): if cover_item: cover_url = self.INDEX + cover_item['src'] return cover_url + + language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 0d073ecce7..f846c7f2e5 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -410,6 +410,7 @@ class RecursiveFetcher(object, LoggingInterface): _fname.decode('latin1', 'replace') _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '') _fname = sanitize_file_name(_fname) + _fname = os.path.splitext(_fname)[0]+'.xhtml' res = os.path.join(linkdiskpath, _fname) self.downloaded_paths.append(res) self.filemap[nurl] = res diff --git a/upload.py b/upload.py index fb88efc149..66a4a62e30 100644 --- a/upload.py +++ b/upload.py @@ -77,7 +77,7 @@ def run_windows_install_jammer(installer): def build_windows(shutdown=True): installer = installer_name('exe') - vm = '/vmware/Windows XP/Windows XP Professional.vmx' + vm = '/mnt/backup/calibre_windows_xp_home/calibre_windows_xp_home.vmx' start_vm(vm, 'windows', BUILD_SCRIPT%('python setup.py develop', 'python','installer\\\\windows\\\\freeze.py')) if os.path.exists('build/py2exe'): shutil.rmtree('build/py2exe')