diff --git a/.pydevproject b/.pydevproject index b6d22db5e1..aaa4cc3986 100644 --- a/.pydevproject +++ b/.pydevproject @@ -2,7 +2,7 @@ -python 2.6 +python 2.5 /calibre/src diff --git a/installer/linux/freeze.py b/installer/linux/freeze.py index c381041675..97bf8061e3 100644 --- a/installer/linux/freeze.py +++ b/installer/linux/freeze.py @@ -38,6 +38,10 @@ def freeze(): '/usr/lib/libxml2.so.2', '/usr/lib/libxslt.so.1', '/usr/lib/libxslt.so.1', + '/usr/lib/libgthread-2.0.so.0', + '/usr/lib/libglib-2.0.so.0', + '/usr/lib/gcc/i686-pc-linux-gnu/4.3.3/libstdc++.so.6', + '/usr/lib/libpng12.so.0', '/usr/lib/libexslt.so.0', '/usr/lib/libMagickWand.so', '/usr/lib/libMagickCore.so', @@ -81,7 +85,8 @@ def freeze(): 'PyQt4.QtScript.so', 'PyQt4.QtSql.so', 'PyQt4.QtTest.so', 'qt', 'glib', 'gobject'] - packages = ['calibre', 'encodings', 'cherrypy', 'cssutils', 'xdg'] + packages = ['calibre', 'encodings', 'cherrypy', 'cssutils', 'xdg', + 'dateutil'] includes += ['calibre.web.feeds.recipes.'+r for r in recipe_modules] diff --git a/installer/osx/freeze.py b/installer/osx/freeze.py index 3ec24d3aba..dbaad72748 100644 --- a/installer/osx/freeze.py +++ b/installer/osx/freeze.py @@ -342,6 +342,7 @@ def main(): 'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*', 'keyword', 'codeop', 'pydoc', 'readline', 'BeautifulSoup', 'calibre.ebooks.lrf.fonts.prs500.*', + 'dateutil', ], 'packages' : ['PIL', 'Authorization', 'lxml'], 'excludes' : ['IPython'], diff --git a/installer/windows/freeze.py b/installer/windows/freeze.py index ab58fb669d..56486f6bd5 100644 --- a/installer/windows/freeze.py +++ b/installer/windows/freeze.py @@ -179,7 +179,8 @@ def main(args=sys.argv): 'calibre.ebooks.lrf.fonts.prs500.*', 'PyQt4.QtWebKit', 'PyQt4.QtNetwork', ], - 'packages' : ['PIL', 'lxml', 'cherrypy'], + 'packages' : ['PIL', 'lxml', 'cherrypy', + 'dateutil'], 'excludes' : ["Tkconstants", "Tkinter", "tcl", "_imagingtk", "ImageTk", "FixTk" ], diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 360947de2a..76ae742de9 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -69,7 +69,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False): one = re.sub(r'^\.+$', '_', one) if as_unicode: one = one.decode(filesystem_encoding) - return one + return one.replace('..', '_') class CommandLineError(Exception): @@ -382,8 +382,10 @@ def walk(dir): for f in record[-1]: yield os.path.join(record[0], f) -def strftime(fmt, t=time.localtime()): +def strftime(fmt, t=None): ''' A version of strtime that returns unicode strings. ''' + if t is None: + t = time.localtime() if iswindows: if isinstance(fmt, unicode): fmt = fmt.encode('mbcs') diff --git a/src/calibre/constants.py b/src/calibre/constants.py index cbb7fba14e..d1b8ff94cb 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.135' +__version__ = '0.4.137' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 625a4035d0..a40878480f 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -132,7 +132,7 @@ class HTMLMetadataReader(MetadataReaderPlugin): class MOBIMetadataReader(MetadataReaderPlugin): name = 'Read MOBI metadata' - file_types = set(['mobi', 'prc']) + file_types = set(['mobi', 'prc', '.azw']) description = _('Read metadata from %s files')%'MOBI' def get_metadata(self, stream, ftype): diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py index f573fb1b75..f092473675 100644 --- a/src/calibre/devices/cybookg3/driver.py +++ b/src/calibre/devices/cybookg3/driver.py @@ -17,8 +17,8 @@ class CYBOOKG3(USBMS): # Be sure these have an entry in calibre.devices.mime FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt'] - VENDOR_ID = 0x0bda - PRODUCT_ID = 0x0703 + VENDOR_ID = [0x0bda, 0x3034] + PRODUCT_ID = [0x0703, 0x1795] BCD = [0x110, 0x132] VENDOR_NAME = 'BOOKEEN' diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 8ef1ba6b9b..0da1f55c5e 100755 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -12,8 +12,8 @@ class KINDLE(USBMS): # Ordered list of supported formats FORMATS = ['azw', 'mobi', 'prc', 'txt'] - VENDOR_ID = 0x1949 - PRODUCT_ID = 0x0001 + VENDOR_ID = [0x1949] + PRODUCT_ID = [0x0001] BCD = [0x399] VENDOR_NAME = 'KINDLE' diff --git a/src/calibre/devices/prs505/books.py b/src/calibre/devices/prs505/books.py index b63b089fdd..06d205fb02 100644 --- a/src/calibre/devices/prs505/books.py +++ b/src/calibre/devices/prs505/books.py @@ -186,7 +186,10 @@ class BookList(_BookList): node = self.document.createElement(self.prefix + "text") mime = MIME_MAP[name.rpartition('.')[-1].lower()] cid = self.max_id()+1 - sourceid = str(self[0].sourceid) if len(self) else "1" + try: + sourceid = str(self[0].sourceid) if len(self) else '1' + except: + sourceid = '1' attrs = { "title" : info["title"], 'titleSorter' : sortable_title(info['title']), diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index 9308af2c5a..f4256c4c14 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -32,7 +32,7 @@ class PRS505(Device): BCD = [0x229] #: Needed to disambiguate 505 and 700 on linux PRODUCT_NAME = 'PRS-505' VENDOR_NAME = 'SONY' - FORMATS = ['lrf', 'epub', 'lrx', 'rtf', 'pdf', 'txt'] + FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt'] MEDIA_XML = 'database/cache/media.xml' CACHE_XML = 'Sony Reader/database/cache.xml' @@ -248,15 +248,20 @@ class PRS505(Device): time.sleep(3) self.open_osx() if self._card_prefix is not None: - cachep = os.path.join(self._card_prefix, self.CACHE_XML) - if not os.path.exists(cachep): - os.makedirs(os.path.dirname(cachep), mode=0777) - f = open(cachep, 'wb') - f.write(u''' + try: + cachep = os.path.join(self._card_prefix, self.CACHE_XML) + if not os.path.exists(cachep): + os.makedirs(os.path.dirname(cachep), mode=0777) + f = open(cachep, 'wb') + f.write(u''' '''.encode('utf8')) - f.close() + f.close() + except: + self._card_prefix = None + import traceback + traceback.print_exc() def set_progress_reporter(self, pr): self.report_progress = pr diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 761fe9ba74..5943e2e13f 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -74,24 +74,27 @@ class Device(_Device): def get_fdi(cls): fdi = '' - fdi_base_values = dict( - app=__appname__, - deviceclass=cls.__name__, - vendor_id=hex(cls.VENDOR_ID), - product_id=hex(cls.PRODUCT_ID), - main_memory=cls.MAIN_MEMORY_VOLUME_LABEL, - storage_card=cls.STORAGE_CARD_VOLUME_LABEL, - ) - if cls.BCD is None: - fdi_base_values['BCD_start'] = '' - fdi_base_values['BCD_end'] = '' - fdi = cls.FDI_TEMPLATE % fdi_base_values - else: - for bcd in cls.BCD: - fdi_bcd_values = fdi_base_values - fdi_bcd_values['BCD_start'] = cls.FDI_BCD_TEMPLATE % dict(bcd=hex(bcd)) - fdi_bcd_values['BCD_end'] = '' - fdi += cls.FDI_TEMPLATE % fdi_bcd_values + for vid in cls.VENDOR_ID: + for pid in cls.PRODUCT_ID: + fdi_base_values = dict( + app=__appname__, + deviceclass=cls.__name__, + vendor_id=hex(vid), + product_id=hex(pid), + main_memory=cls.MAIN_MEMORY_VOLUME_LABEL, + storage_card=cls.STORAGE_CARD_VOLUME_LABEL, + ) + + if cls.BCD is None: + fdi_base_values['BCD_start'] = '' + fdi_base_values['BCD_end'] = '' + fdi += cls.FDI_TEMPLATE % fdi_base_values + else: + for bcd in cls.BCD: + fdi_bcd_values = fdi_base_values + fdi_bcd_values['BCD_start'] = cls.FDI_BCD_TEMPLATE % dict(bcd=hex(bcd)) + fdi_bcd_values['BCD_end'] = '' + fdi += cls.FDI_TEMPLATE % fdi_bcd_values return fdi diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py index e81821ed53..9a8e251108 100644 --- a/src/calibre/ebooks/epub/from_any.py +++ b/src/calibre/ebooks/epub/from_any.py @@ -124,6 +124,7 @@ MAP = { 'lit' : lit2opf, 'mobi' : mobi2opf, 'prc' : mobi2opf, + 'azw' : mobi2opf, 'fb2' : fb22opf, 'rtf' : rtf2opf, 'txt' : txt2opf, @@ -131,7 +132,8 @@ MAP = { 'epub' : epub2opf, 'odt' : odt2epub, } -SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'fb2', 'odt', 'rtf', 'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub'] +SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf', + 'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub'] def unarchive(path, tdir): extract(path, tdir) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index bd9b59cfbd..fd94c9ee69 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -74,7 +74,9 @@ def check_links(opf_path, pretty_print): html_files = [] for item in opf.itermanifest(): if 'html' in item.get('media-type', '').lower(): - f = item.get('href').split('/')[-1].decode('utf-8') + f = item.get('href').split('/')[-1] + if isinstance(f, str): + f = f.decode('utf-8') html_files.append(os.path.abspath(content(f))) for path in html_files: diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index e264fec7cb..b468e80246 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -330,7 +330,10 @@ class PreProcessor(object): sanitize_head), # Convert all entities, since lxml doesn't handle them well (re.compile(r'&(\S+?);'), convert_entities), - + # Remove the ]*>'), lambda match: ''), + # Strip all comments since Adobe DE is petrified of them + (re.compile(r'.*', re.DOTALL), + lambda match: ''), + ] + + feeds = [ + (u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'), + (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'), + (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'), + (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'), + (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'), + (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'), + (u'Main - Weather / Religion / Crossword / Cartoon', + u'http://www.hindu.com/rss/10hdline.xml'), + (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'), + (u'Supplement - Literary Review', + u'http://www.hindu.com/rss/lrhdline.xml'), + (u'Supplement - Sunday Magazine', + u'http://www.hindu.com/rss/maghdline.xml'), + (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'), + (u'Supplement - Business Review', + u'http://www.hindu.com/rss/bizhdline.xml'), + (u'Supplement - Book Review', + u'http://www.hindu.com/rss/brhdline.xml'), + (u'Supplement - Science & Technology', + u'http://www.hindu.com/rss/setahdline.xml') + ] + + def postprocess_html(self, soup, first_fetch): + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' + return soup \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_infobae.py b/src/calibre/web/feeds/recipes/recipe_infobae.py index 40e720f94c..13c52ca6b1 100644 --- a/src/calibre/web/feeds/recipes/recipe_infobae.py +++ b/src/calibre/web/feeds/recipes/recipe_infobae.py @@ -6,29 +6,36 @@ __copyright__ = '2008-2009, Darko Miletic ' infobae.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Infobae(BasicNewsRecipe): title = 'Infobae.com' __author__ = 'Darko Miletic' description = 'Informacion Libre las 24 horas' publisher = 'Infobae.com' category = 'news, politics, Argentina' - oldest_article = 2 + oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + language = _('Spanish') encoding = 'iso-8859-1' cover_url = 'http://www.infobae.com/imgs/header/header.gif' remove_javascript = True html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + remove_tags = [ + dict(name=['embed','link','object']) + ,dict(name='a', attrs={'onclick':'javascript:window.print()'}) + ] feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) @@ -48,5 +55,3 @@ class Infobae(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_jutarnji.py b/src/calibre/web/feeds/recipes/recipe_jutarnji.py index 03c22c9b99..e8826bc4e1 100644 --- a/src/calibre/web/feeds/recipes/recipe_jutarnji.py +++ b/src/calibre/web/feeds/recipes/recipe_jutarnji.py @@ -1,47 +1,46 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' jutarnji.hr ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Jutarnji(BasicNewsRecipe): title = u'Jutarnji' __author__ = u'Darko Miletic' description = u'Hrvatski portal' publisher = 'Jutarnji.hr' category = 'news, politics, Croatia' - oldest_article = 2 + oldest_article = 1 max_articles_per_feed = 100 - simultaneous_downloads = 1 - delay = 1 - language = _('Croatian') + simultaneous_downloads = 2 + delay = 1 + language = _('Croatian') no_stylesheets = True use_embedded_content = False remove_javascript = True encoding = 'cp1250' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ - dict(name='embed') + dict(name=['embed','hr','link','object']) ,dict(name='a', attrs={'class':'a11'}) - ,dict(name='hr') ] feeds = [ @@ -60,13 +59,11 @@ class Jutarnji(BasicNewsRecipe): return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest def preprocess_html(self, soup): - mtag = '' + mtag = '\n' soup.head.insert(0,mtag) - mtag = '' - soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] for item in soup.findAll(width=True): del item['width'] return soup - + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py index eea510a7cd..bb8e645fbe 100644 --- a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py +++ b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py @@ -1,14 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' juventudrebelde.cu ''' -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + class Juventudrebelde(BasicNewsRecipe): title = 'Juventud Rebelde' __author__ = 'Darko Miletic' @@ -20,17 +20,18 @@ class Juventudrebelde(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'cp1252' + language = _('Spanish') cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg') remove_javascript = True html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] @@ -50,5 +51,4 @@ class Juventudrebelde(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Spanish') \ No newline at end of file + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py new file mode 100644 index 0000000000..04db6b02d5 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py @@ -0,0 +1,80 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Linuxdevices. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Sueddeutsche(BasicNewsRecipe): + + title = u'Linuxdevices' + description = 'News about Linux driven Hardware' + __author__ = 'Oliver Niesner' + use_embedded_content = False + timefmt = ' [%a, %d %b %Y]' + language = _('English') + max_articles_per_feed = 50 + no_stylesheets = True + encoding = 'latin1' + + remove_tags_after = [dict(id='nointelliTXT')] + filter_regexps = [r'ad\.doubleclick\.net'] + + + remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), + dict(name='div', attrs={'class':'bannerSky'}), + dict(name='div', attrs={'class':'footerLinks'}), + dict(name='div', attrs={'class':'seitenanfang'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='table', attrs={'class':'pageAktiv'}), + dict(name='table', attrs={'class':'xartable'}), + dict(name='table', attrs={'class':'wpnavi'}), + dict(name='table', attrs={'class':'bgcontent absatz'}), + dict(name='table', attrs={'class':'footer'}), + dict(name='table', attrs={'class':'artikelBox'}), + dict(name='table', attrs={'class':'kommentare'}), + dict(name='table', attrs={'class':'pageBoxBot'}), + #dict(name='table', attrs={'with':'100%'}), + dict(name='td', attrs={'nowrap':'nowrap'}), + dict(name='td', attrs={'valign':'middle'}), + dict(name='td', attrs={'align':'left'}), + dict(name='td', attrs={'align':'center'}), + dict(name='td', attrs={'height':'5'}), + dict(name='div', attrs={'class':'artikelBox navigatorBox'}), + dict(name='div', attrs={'class':'similar-article-box'}), + dict(name='div', attrs={'class':'videoBigHack'}), + dict(name='td', attrs={'class':'artikelDruckenRight'}), + dict(name='td', attrs={'class':'width="200"'}), + dict(name='a', attrs={'href':'/news'}), + dict(name='a', attrs={'href':'/'}), + dict(name='a', attrs={'href':'/articles'}), + dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), + dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), + dict(name='iframe'), + dict(name='form'), + #dict(name='tr', attrs={'td':'Click here to learn'}), + dict(name='span', attrs={'class':'hidePrint'}), + dict(id='headerLBox'), + dict(id='nointelliTXT'), + dict(id='rechteSpalte'), + dict(id='newsticker-list-small'), + dict(id='ntop5'), + dict(id='ntop5send'), + dict(id='ntop5commented'), + dict(id='nnav-bgheader'), + dict(id='nnav-headerteaser'), + dict(id='nnav-head'), + dict(id='nnav-top'), + dict(id='nnav-logodiv'), + dict(id='nnav-logo'), + dict(id='nnav-oly'), + dict(id='readcomment')] + + + + feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + diff --git a/src/calibre/web/feeds/recipes/recipe_nin.py b/src/calibre/web/feeds/recipes/recipe_nin.py index d180f2b221..85019b07ea 100644 --- a/src/calibre/web/feeds/recipes/recipe_nin.py +++ b/src/calibre/web/feeds/recipes/recipe_nin.py @@ -1,15 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' nin.co.yu ''' import re, urllib +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' @@ -27,15 +26,17 @@ class Nin(BasicNewsRecipe): LOGIN = PREFIX + '/?logout=true' remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' - + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -69,5 +70,3 @@ class Nin(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_novosti.py b/src/calibre/web/feeds/recipes/recipe_novosti.py index 136302c573..0190307542 100644 --- a/src/calibre/web/feeds/recipes/recipe_novosti.py +++ b/src/calibre/web/feeds/recipes/recipe_novosti.py @@ -1,15 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' novosti.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Novosti(BasicNewsRecipe): title = u'Vecernje Novosti' __author__ = u'Darko Miletic' @@ -22,15 +21,17 @@ class Novosti(BasicNewsRecipe): use_embedded_content = False encoding = 'utf8' remove_javascript = True - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -40,10 +41,8 @@ class Novosti(BasicNewsRecipe): feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] def preprocess_html(self, soup): - mtag = '' + mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nspm.py b/src/calibre/web/feeds/recipes/recipe_nspm.py index 4cc6d50ca0..0ff80b8a93 100644 --- a/src/calibre/web/feeds/recipes/recipe_nspm.py +++ b/src/calibre/web/feeds/recipes/recipe_nspm.py @@ -1,41 +1,44 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' nspm.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Nspm(BasicNewsRecipe): title = u'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' publisher = 'NSPM' category = 'news, politics, Serbia' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False INDEX = 'http://www.nspm.rs/?alphabet=l' encoding = 'utf8' remove_javascript = True - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - remove_tags = [dict(name='a')] + remove_tags = [ + dict(name=['a','img','link','object','embed']) + ,dict(name='td', attrs={'class':'buttonheading'}) + ] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -48,13 +51,12 @@ class Nspm(BasicNewsRecipe): return url.replace('.html','/stampa.html') def preprocess_html(self, soup): - soup.html['xml:lang'] = 'sr-Latn-RS' - soup.html['lang'] = 'sr-Latn-RS' + lng = 'sr-Latn-RS' + soup.html['xml:lang'] = lng + soup.html['lang'] = lng ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'}) if ftag: - ftag['content'] = 'sr-Latn-RS' + ftag['content'] = lng for item in soup.findAll(style=True): - del item['style'] + del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_pescanik.py b/src/calibre/web/feeds/recipes/recipe_pescanik.py index e3385e02aa..278ed38183 100644 --- a/src/calibre/web/feeds/recipes/recipe_pescanik.py +++ b/src/calibre/web/feeds/recipes/recipe_pescanik.py @@ -1,45 +1,46 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' pescanik.net ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Pescanik(BasicNewsRecipe): title = 'Pescanik' __author__ = 'Darko Miletic' description = 'Pescanik' publisher = 'Pescanik' category = 'news, politics, Serbia' - oldest_article = 7 + oldest_article = 5 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False remove_javascript = True encoding = 'utf8' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png" + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png" preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ dict(name='td' , attrs={'class':'buttonheading'}) ,dict(name='span', attrs={'class':'article_seperator'}) - ,dict(name=['object','link']) + ,dict(name=['object','link','img','h4','ul']) ] feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')] @@ -54,5 +55,3 @@ class Pescanik(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_pobjeda.py b/src/calibre/web/feeds/recipes/recipe_pobjeda.py new file mode 100644 index 0000000000..9a4dbb0eee --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_pobjeda.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +pobjeda.co.me +''' + +import re +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class Pobjeda(BasicNewsRecipe): + title = 'Pobjeda Online' + __author__ = 'Darko Miletic' + description = 'News from Montenegro' + publisher = 'Pobjeda a.d.' + category = 'news, politics, Montenegro' + language = _('Serbian') + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + encoding = 'utf8' + remove_javascript = True + use_embedded_content = False + INDEX = u'http://www.pobjeda.co.me' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [dict(name='div', attrs={'class':'vijest'})] + + remove_tags = [dict(name=['object','link'])] + + feeds = [ + (u'Politika' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=1' ) + ,(u'Ekonomija' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=2' ) + ,(u'Drustvo' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=3' ) + ,(u'Crna Hronika' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=4' ) + ,(u'Kultura' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=5' ) + ,(u'Hronika Podgorice' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=7' ) + ,(u'Feljton' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=8' ) + ,(u'Crna Gora' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=9' ) + ,(u'Svijet' , u'http://www.pobjeda.co.me/rubrika.php?rubrika=202') + ,(u'Ekonomija i Biznis', u'http://www.pobjeda.co.me/dodatak.php?rubrika=11' ) + ,(u'Djeciji Svijet' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=12' ) + ,(u'Kultura i Drustvo' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=13' ) + ,(u'Agora' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=133') + ,(u'Ekologija' , u'http://www.pobjeda.co.me/dodatak.php?rubrika=252') + ] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = 'sr-Latn-ME' + soup.html['lang'] = 'sr-Latn-ME' + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('img',attrs={'alt':'Naslovna strana'}) + if cover_item: + cover_url = self.INDEX + cover_item.parent['href'] + return cover_url + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('div', attrs={'class':'vijest'}): + description = self.tag_to_string(item.h2) + atag = item.h1.find('a') + if atag: + url = self.INDEX + '/' + atag['href'] + title = self.tag_to_string(atag) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + diff --git a/src/calibre/web/feeds/recipes/recipe_politika.py b/src/calibre/web/feeds/recipes/recipe_politika.py index 1575d8984f..93c8f43b36 100644 --- a/src/calibre/web/feeds/recipes/recipe_politika.py +++ b/src/calibre/web/feeds/recipes/recipe_politika.py @@ -6,9 +6,8 @@ __copyright__ = '2008, Darko Miletic ' politika.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Politika(BasicNewsRecipe): title = u'Politika Online' __author__ = 'Darko Miletic' @@ -21,10 +20,11 @@ class Politika(BasicNewsRecipe): use_embedded_content = False remove_javascript = True encoding = 'utf8' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description + '--comment', description , '--category', category , '--publisher', publisher ] @@ -60,6 +60,6 @@ class Politika(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] ftag = soup.find('div',attrs={'class':'content_center_border'}) - if ftag: - ftag['align'] = 'left' + if ftag.has_key('align'): + del ftag['align'] return soup diff --git a/src/calibre/web/feeds/recipes/recipe_vijesti.py b/src/calibre/web/feeds/recipes/recipe_vijesti.py index 98a7736a96..9923193d7b 100644 --- a/src/calibre/web/feeds/recipes/recipe_vijesti.py +++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py @@ -4,13 +4,12 @@ __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' -vijesti.cg.yu +vijesti.me ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Vijesti(BasicNewsRecipe): title = 'Vijesti' __author__ = 'Darko Miletic' @@ -22,13 +21,14 @@ class Vijesti(BasicNewsRecipe): no_stylesheets = True remove_javascript = True encoding = 'cp1250' - cover_url = 'http://www.vijesti.cg.yu/img/logo.gif' + cover_url = 'http://www.vijesti.me/img/logo.gif' remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description + '--comment', description , '--category', category , '--publisher', publisher ] @@ -39,12 +39,9 @@ class Vijesti(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})] - remove_tags = [ - dict(name='div', attrs={'align':'right'}) - ,dict(name=['object','link']) - ] + remove_tags = [dict(name=['object','link','embed'])] - feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )] + feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )] def preprocess_html(self, soup): soup.html['xml:lang'] = 'sr-Latn-ME' @@ -56,5 +53,3 @@ class Vijesti(BasicNewsRecipe): del item['align'] item.insert(0,'

') return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_vreme.py b/src/calibre/web/feeds/recipes/recipe_vreme.py index 27697acf8e..c78e956d29 100644 --- a/src/calibre/web/feeds/recipes/recipe_vreme.py +++ b/src/calibre/web/feeds/recipes/recipe_vreme.py @@ -1,16 +1,15 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' vreme.com ''' import re from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Vreme(BasicNewsRecipe): title = 'Vreme' __author__ = 'Darko Miletic' @@ -24,15 +23,17 @@ class Vreme(BasicNewsRecipe): LOGIN = 'http://www.vreme.com/account/index.php' remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -87,14 +88,19 @@ class Vreme(BasicNewsRecipe): del soup.body['text' ] del soup.body['bgcolor'] del soup.body['onload' ] - mtag = '' + for item in soup.findAll('table'): + if item.has_key('width'): + del item['width'] + if item.has_key('height'): + del item['height'] + mtag = '' soup.head.insert(0,mtag) tbl = soup.body.table tbbb = soup.find('td') if tbbb: tbbb.extract() tbl.extract() - soup.body.insert(0,tbbb) + soup.body.insert(0,tbbb) return soup def get_cover_url(self): @@ -104,5 +110,3 @@ class Vreme(BasicNewsRecipe): if cover_item: cover_url = self.INDEX + cover_item['src'] return cover_url - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index f846c7f2e5..4da3f4019c 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -8,7 +8,7 @@ Fetch a webpage and its links recursively. The webpages are saved to disk in UTF-8 encoding with any charset declarations removed. ''' import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, traceback -from urllib import url2pathname +from urllib import url2pathname, quote from threading import RLock from httplib import responses from PIL import Image @@ -179,6 +179,8 @@ class RecursiveFetcher(object, LoggingInterface): delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(delta) + if re.search(r'\s+', url) is not None: + url = quote(url) with self.browser_lock: try: with closing(self.browser.open(url)) as f: diff --git a/src/cherrypy/_cpchecker.py b/src/cherrypy/_cpchecker.py index 0d98e84918..445a1f3201 100644 --- a/src/cherrypy/_cpchecker.py +++ b/src/cherrypy/_cpchecker.py @@ -58,7 +58,7 @@ class Checker(object): "specific sections. You must explicitly pass " "application config via " "cherrypy.tree.mount(..., config=app_config)") - warnings.warn(msg) + warnings.warn(msg[:5]) return def check_static_paths(self):