diff --git a/installer/windows/freeze.py b/installer/windows/freeze.py index aca1481fa8..0829907d01 100644 --- a/installer/windows/freeze.py +++ b/installer/windows/freeze.py @@ -14,7 +14,20 @@ IMAGEMAGICK_DIR = 'C:\\ImageMagick' FONTCONFIG_DIR = 'C:\\fontconfig' VC90 = r'C:\VC90.CRT' -import sys, os, py2exe, shutil, zipfile, glob, re +# ModuleFinder can't handle runtime changes to __path__, but win32com uses them +import sys +import py2exe.mf as modulefinder +import win32com +for p in win32com.__path__[1:]: + modulefinder.AddPackagePath("win32com", p) +for extra in ["win32com.shell"]: #,"win32com.mapi" + __import__(extra) + m = sys.modules[extra] + for p in m.__path__[1:]: + modulefinder.AddPackagePath(extra, p) + + +import os, py2exe, shutil, zipfile, glob, re from distutils.core import setup BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) sys.path.insert(0, BASE_DIR) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 04284ab8b3..00276f6970 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.5.5' +__version__ = '0.5.6' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index 0ce4629062..3e1ec4c811 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -36,7 +36,7 @@ import os, sys, cStringIO, logging, re, functools, shutil from lxml.etree import XPath from lxml import html, etree -from PyQt4.Qt import QApplication, QPixmap +from PyQt4.Qt import QApplication, QPixmap, Qt from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\ opf_traverse, create_metadata, rebase_toc, Link, parser @@ -50,7 +50,7 @@ from calibre.ebooks.epub.pages import add_page_map from calibre.ebooks.epub.fonts import Rationalizer from calibre.constants import preferred_encoding from calibre.customize.ui import run_plugins_on_postprocess -from calibre import walk, CurrentDir, to_unicode +from calibre import walk, CurrentDir, to_unicode, fit_image content = functools.partial(os.path.join, u'content') @@ -112,6 +112,31 @@ def find_html_index(files): return f, os.path.splitext(f)[1].lower()[1:] return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:] +def rescale_images(imgdir, screen_size, log): + pwidth, pheight = screen_size + if QApplication.instance() is None: + QApplication([]) + for f in os.listdir(imgdir): + path = os.path.join(imgdir, f) + if os.path.splitext(f)[1] in ('.css', '.js'): + continue + + p = QPixmap() + p.load(path) + if p.isNull(): + continue + width, height = p.width(), p.height() + scaled, new_width, new_height = fit_image(width, height, pwidth, + pheight) + if scaled: + log.info('Rescaling image: '+f) + p.scaled(new_width, new_height, Qt.IgnoreAspectRatio, + Qt.SmoothTransformation).save(path, 'JPEG') + + + + + class HTMLProcessor(Processor, Rationalizer): def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets): @@ -482,6 +507,10 @@ def convert(htmlfile, opts, notification=None, create_epub=True, if os.stat(ncx_path).st_size > opts.profile.flow_size: logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size) + if opts.profile.screen_size is not None: + rescale_images(os.path.join(tdir, 'content', 'resources'), + opts.profile.screen_size, logger) + if create_epub: epub = initialize_container(opts.output) epub.add_dir(tdir) diff --git a/src/calibre/gui2/lrf_renderer/config.ui b/src/calibre/gui2/lrf_renderer/config.ui index 47956fe003..cad538090a 100644 --- a/src/calibre/gui2/lrf_renderer/config.ui +++ b/src/calibre/gui2/lrf_renderer/config.ui @@ -1,72 +1,73 @@ - + + ViewerConfig - - + + 0 0 - 281 - 214 + 373 + 264 - + Configure Viewer - - + + :/images/config.svg:/images/config.svg - - - - + + + + Use white background - - - + + + Hyphenate - + true - - - - Qt::Horizontal - - - QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - - - - + + + QFrame::Box - + <b>Changes will only take effect after a restart.</b> - + Qt::RichText - + Qt::AlignCenter - + true + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + - + @@ -75,11 +76,11 @@ ViewerConfig accept() - + 248 254 - + 157 274 @@ -91,11 +92,11 @@ ViewerConfig reject() - + 316 260 - + 286 274 diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index fd80a722e5..b02b060902 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -31,6 +31,21 @@ from calibre.customize.ui import run_plugins_on_import from calibre import sanitize_file_name from calibre.ebooks import BOOK_EXTENSIONS +if iswindows: + import calibre.utils.winshell as winshell + +def delete_file(path): + try: + winshell.delete_file(path, silent=True, no_confirm=True) + except: + os.remove(path) + +def delete_tree(path): + try: + winshell.delete_file(path, silent=True, no_confirm=True) + except: + shutil.rmtree(path) + copyfile = os.link if hasattr(os, 'link') else shutil.copyfile FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, @@ -499,7 +514,7 @@ class LibraryDatabase2(LibraryDatabase): def rmtree(self, path): if not self.normpath(self.library_path).startswith(self.normpath(path)): - shutil.rmtree(path) + delete_tree(path) def normpath(self, path): path = os.path.abspath(os.path.realpath(path)) @@ -745,7 +760,10 @@ class LibraryDatabase2(LibraryDatabase): path = os.path.join(self.library_path, self.path(id, index_is_id=True)) self.data.remove(id) if os.path.exists(path): - self.rmtree(path) + if iswindows: + winshell.delete_file(path, no_confirm=True, silent=True) + else: + self.rmtree(path) parent = os.path.dirname(path) if len(os.listdir(parent)) == 0: self.rmtree(parent) @@ -764,7 +782,7 @@ class LibraryDatabase2(LibraryDatabase): ext = ('.' + format.lower()) if format else '' path = os.path.join(path, name+ext) try: - os.remove(path) + delete_file(path) except: traceback.print_exc() self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 7ca4b1b876..265fbf3ad0 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -105,7 +105,7 @@ Device Integration What devices does |app| support? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1 and 2 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. +At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1/2, Netronix EB600 and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/utils/winshell.py b/src/calibre/utils/winshell.py new file mode 100644 index 0000000000..9769f52e4a --- /dev/null +++ b/src/calibre/utils/winshell.py @@ -0,0 +1,400 @@ +"""winshell - convenience functions to access Windows shell functionality + +Certain aspects of the Windows user interface are grouped by + Microsoft as Shell functions. These include the Desktop, shortcut + icons, special folders (such as My Documents) and a few other things. + +These are mostly available via the shell module of the win32all + extensions, but whenever I need to use them, I've forgotten the + various constants and so on. + +Several of the shell items have two variants: personal and common, + or User and All Users. These refer to systems with profiles in use: + anything from NT upwards, and 9x with Profiles turned on. Where + relevant, the Personal/User version refers to that owned by the + logged-on user and visible only to that user; the Common/All Users + version refers to that maintained by an Administrator and visible + to all users of the system. + +(c) Tim Golden 25th November 2003 +Licensed under the (GPL-compatible) MIT License: +http://www.opensource.org/licenses/mit-license.php + +9th Nov 2005 0.2 . License changed to MIT + . Added functionality using SHFileOperation +25th Nov 2003 0.1 . Initial release by Tim Golden +""" + +__VERSION__ = "0.2" + +import os +from win32com import storagecon +from win32com.shell import shell, shellcon +import pythoncom + +class x_winshell (Exception): + pass + +# +# Although this can be done in one call, Win9x didn't +# support it, so I added this workaround. +# +def get_path (folder_id): + return shell.SHGetPathFromIDList (shell.SHGetSpecialFolderLocation (0, folder_id)) + +def desktop (common=0): + "What folder is equivalent to the current desktop?" + return get_path ((shellcon.CSIDL_DESKTOP, shellcon.CSIDL_COMMON_DESKTOPDIRECTORY)[common]) + +def common_desktop (): +# +# Only here because already used in code +# + return desktop (common=1) + +def application_data (common=0): + "What folder holds application configuration files?" + return get_path ((shellcon.CSIDL_APPDATA, shellcon.CSIDL_COMMON_APPDATA)[common]) + +def favourites (common=0): + "What folder holds the Explorer favourites shortcuts?" + return get_path ((shellcon.CSIDL_FAVORITES, shellcon.CSIDL_COMMON_FAVORITES)[common]) +bookmarks = favourites + +def start_menu (common=0): + "What folder holds the Start Menu shortcuts?" + return get_path ((shellcon.CSIDL_STARTMENU, shellcon.CSIDL_COMMON_STARTMENU)[common]) + +def programs (common=0): + "What folder holds the Programs shortcuts (from the Start Menu)?" + return get_path ((shellcon.CSIDL_PROGRAMS, shellcon.CSIDL_COMMON_PROGRAMS)[common]) + +def startup (common=0): + "What folder holds the Startup shortcuts (from the Start Menu)?" + return get_path ((shellcon.CSIDL_STARTUP, shellcon.CSIDL_COMMON_STARTUP)[common]) + +def personal_folder (): + "What folder holds the My Documents files?" + return get_path (shellcon.CSIDL_PERSONAL) +my_documents = personal_folder + +def recent (): + "What folder holds the Documents shortcuts (from the Start Menu)?" + return get_path (shellcon.CSIDL_RECENT) + +def sendto (): + "What folder holds the SendTo shortcuts (from the Context Menu)?" + return get_path (shellcon.CSIDL_SENDTO) + +# +# Internally abstracted function to handle one +# of several shell-based file manipulation +# routines. Not all the possible parameters +# are covered which might be passed to the +# underlying SHFileOperation API call, but +# only those which seemed useful to me at +# the time. +# +def _file_operation ( + operation, + source_path, + target_path=None, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + # + # At present the Python wrapper around SHFileOperation doesn't + # allow lists of files. Hopefully it will at some point, so + # take account of it here. + # If you pass this shell function a "/"-separated path with + # a wildcard, eg c:/temp/*.tmp, it gets confused. It's ok + # with a backslash, so convert here. + # + source_path = source_path or "" + if isinstance (source_path, basestring): + source_path = os.path.abspath (source_path) + else: + source_path = [os.path.abspath (i) for i in source_path] + + target_path = target_path or "" + if isinstance (target_path, basestring): + target_path = os.path.abspath (target_path) + else: + target_path = [os.path.abspath (i) for i in target_path] + + flags = 0 + if allow_undo: flags |= shellcon.FOF_ALLOWUNDO + if no_confirm: flags |= shellcon.FOF_NOCONFIRMATION + if rename_on_collision: flags |= shellcon.FOF_RENAMEONCOLLISION + if silent: flags |= shellcon.FOF_SILENT + + result, n_aborted = shell.SHFileOperation ( + (hWnd or 0, operation, source_path, target_path, flags, None, None) + ) + if result <> 0: + raise x_winshell, result + elif n_aborted: + raise x_winshell, "%d operations were aborted by the user" % n_aborted + +def copy_file ( + source_path, + target_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file copy. Copying in + this way allows the possibility of undo, auto-renaming, + and showing the "flying file" animation during the copy. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_COPY, + source_path, + target_path, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def move_file ( + source_path, + target_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file move. Moving in + this way allows the possibility of undo, auto-renaming, + and showing the "flying file" animation during the copy. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_MOVE, + source_path, + target_path, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def rename_file ( + source_path, + target_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file rename. Renaming in + this way allows the possibility of undo, auto-renaming, + and showing the "flying file" animation during the copy. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_RENAME, + source_path, + target_path, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def delete_file ( + source_path, + allow_undo=True, + no_confirm=False, + rename_on_collision=True, + silent=False, + hWnd=None +): + """Perform a shell-based file delete. Deleting in + this way uses the system recycle bin, allows the + possibility of undo, and showing the "flying file" + animation during the delete. + + The default options allow for undo, don't automatically + clobber on a name clash, automatically rename on collision + and display the animation. + """ + _file_operation ( + shellcon.FO_DELETE, + source_path, + None, + allow_undo, + no_confirm, + rename_on_collision, + silent, + hWnd + ) + +def CreateShortcut (Path, Target, Arguments = "", StartIn = "", Icon = ("",0), Description = ""): + """Create a Windows shortcut: + + Path - As what file should the shortcut be created? + Target - What command should the desktop use? + Arguments - What arguments should be supplied to the command? + StartIn - What folder should the command start in? + Icon - (filename, index) What icon should be used for the shortcut? + Description - What description should the shortcut be given? + + eg + CreateShortcut ( + Path=os.path.join (desktop (), "PythonI.lnk"), + Target=r"c:\python\python.exe", + Icon=(r"c:\python\python.exe", 0), + Description="Python Interpreter" + ) + """ + sh = pythoncom.CoCreateInstance ( + shell.CLSID_ShellLink, + None, + pythoncom.CLSCTX_INPROC_SERVER, + shell.IID_IShellLink + ) + + sh.SetPath (Target) + sh.SetDescription (Description) + sh.SetArguments (Arguments) + sh.SetWorkingDirectory (StartIn) + sh.SetIconLocation (Icon[0], Icon[1]) + + persist = sh.QueryInterface (pythoncom.IID_IPersistFile) + persist.Save (Path, 1) + +# +# Constants for structured storage +# +# These come from ObjIdl.h +FMTID_USER_DEFINED_PROPERTIES = "{F29F85E0-4FF9-1068-AB91-08002B27B3D9}" +FMTID_CUSTOM_DEFINED_PROPERTIES = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" + +PIDSI_TITLE = 0x00000002 +PIDSI_SUBJECT = 0x00000003 +PIDSI_AUTHOR = 0x00000004 +PIDSI_CREATE_DTM = 0x0000000c +PIDSI_KEYWORDS = 0x00000005 +PIDSI_COMMENTS = 0x00000006 +PIDSI_TEMPLATE = 0x00000007 +PIDSI_LASTAUTHOR = 0x00000008 +PIDSI_REVNUMBER = 0x00000009 +PIDSI_EDITTIME = 0x0000000a +PIDSI_LASTPRINTED = 0x0000000b +PIDSI_LASTSAVE_DTM = 0x0000000d +PIDSI_PAGECOUNT = 0x0000000e +PIDSI_WORDCOUNT = 0x0000000f +PIDSI_CHARCOUNT = 0x00000010 +PIDSI_THUMBNAIL = 0x00000011 +PIDSI_APPNAME = 0x00000012 +PROPERTIES = ( + PIDSI_TITLE, + PIDSI_SUBJECT, + PIDSI_AUTHOR, + PIDSI_CREATE_DTM, + PIDSI_KEYWORDS, + PIDSI_COMMENTS, + PIDSI_TEMPLATE, + PIDSI_LASTAUTHOR, + PIDSI_EDITTIME, + PIDSI_LASTPRINTED, + PIDSI_LASTSAVE_DTM, + PIDSI_PAGECOUNT, + PIDSI_WORDCOUNT, + PIDSI_CHARCOUNT, + PIDSI_APPNAME +) + +# +# This was taken from someone else's example, +# but I can't find where. If you know, please +# tell me so I can give due credit. +# +def structured_storage (filename): + """Pick out info from MS documents with embedded + structured storage (typically MS Word docs etc.) + + Returns a dictionary of information found + """ + + if not pythoncom.StgIsStorageFile (filename): + return {} + + flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE + storage = pythoncom.StgOpenStorage (filename, None, flags) + try: + properties_storage = storage.QueryInterface (pythoncom.IID_IPropertySetStorage) + except pythoncom.com_error: + return {} + + property_sheet = properties_storage.Open (FMTID_USER_DEFINED_PROPERTIES) + try: + data = property_sheet.ReadMultiple (PROPERTIES) + finally: + property_sheet = None + + title, subject, author, created_on, keywords, comments, template_used, \ + updated_by, edited_on, printed_on, saved_on, \ + n_pages, n_words, n_characters, \ + application = data + + result = {} + if title: result['title'] = title + if subject: result['subject'] = subject + if author: result['author'] = author + if created_on: result['created_on'] = created_on + if keywords: result['keywords'] = keywords + if comments: result['comments'] = comments + if template_used: result['template_used'] = template_used + if updated_by: result['updated_by'] = updated_by + if edited_on: result['edited_on'] = edited_on + if printed_on: result['printed_on'] = printed_on + if saved_on: result['saved_on'] = saved_on + if n_pages: result['n_pages'] = n_pages + if n_words: result['n_words'] = n_words + if n_characters: result['n_characters'] = n_characters + if application: result['application'] = application + return result + +if __name__ == '__main__': + try: + print 'Desktop =>', desktop () + print 'Common Desktop =>', desktop (1) + print 'Application Data =>', application_data () + print 'Common Application Data =>', application_data (1) + print 'Bookmarks =>', bookmarks () + print 'Common Bookmarks =>', bookmarks (1) + print 'Start Menu =>', start_menu () + print 'Common Start Menu =>', start_menu (1) + print 'Programs =>', programs () + print 'Common Programs =>', programs (1) + print 'Startup =>', startup () + print 'Common Startup =>', startup (1) + print 'My Documents =>', my_documents () + print 'Recent =>', recent () + print 'SendTo =>', sendto () + finally: + raw_input ("Press enter...") + diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4ee6753180..71529b79e9 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1023,6 +1023,28 @@ class BasicNewsRecipe(object): nmassage.extend(entity_replace) return BeautifulSoup(raw, markupMassage=nmassage) + @classmethod + def adeify_images(cls, soup): + ''' + If your recipe when converted to EPUB has problems with images when + viewed in Adobe Digital Editions, call this method from within + :method:`postprocess_html`. + ''' + for item in soup.findAll('img'): + for attrib in ['height','width','border','align','style']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + + class CustomIndexRecipe(BasicNewsRecipe): def custom_index(self): diff --git a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py index ac4ee2b860..88860bf493 100644 --- a/src/calibre/web/feeds/recipes/recipe_24sata_rs.py +++ b/src/calibre/web/feeds/recipes/recipe_24sata_rs.py @@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic ' import re from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Ser24Sata(BasicNewsRecipe): title = '24 Sata - Sr' @@ -39,14 +40,30 @@ class Ser24Sata(BasicNewsRecipe): feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + def preprocess_html(self, soup): soup.html['xml:lang'] = 'sr-Latn-RS' soup.html['lang'] = 'sr-Latn-RS' mtag = '\n' soup.head.insert(0,mtag) - return soup + return self.cleanup_image_tags(soup) def print_version(self, url): article, sep, rest = url.partition('#') - return article.replace('/show.php','/_print.php') + article_base, sep2, article_id = article.partition('id=') + return 'http://www.24sata.co.rs/_print.php?id=' + article_id diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py index 05d4e43865..e4e4987dec 100644 --- a/src/calibre/web/feeds/recipes/recipe_blic.py +++ b/src/calibre/web/feeds/recipes/recipe_blic.py @@ -8,11 +8,12 @@ blic.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Blic(BasicNewsRecipe): - title = u'Blic' - __author__ = u'Darko Miletic' - description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' + title = 'Blic' + __author__ = 'Darko Miletic' + description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' publisher = 'RINGIER d.o.o.' category = 'news, politics, Serbia' oldest_article = 2 @@ -21,7 +22,7 @@ class Blic(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False language = _('Serbian') - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} ' html2lrf_options = [ '--comment' , description @@ -30,7 +31,7 @@ class Blic(BasicNewsRecipe): , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -44,10 +45,26 @@ class Blic(BasicNewsRecipe): start_url, question, rest_url = url.partition('?') return u'http://www.blic.rs/_print.php?' + rest_url + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + + def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] - return soup + return self.cleanup_image_tags(soup) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_new_yorker.py b/src/calibre/web/feeds/recipes/recipe_new_yorker.py index 628919ce71..3ebc275d99 100644 --- a/src/calibre/web/feeds/recipes/recipe_new_yorker.py +++ b/src/calibre/web/feeds/recipes/recipe_new_yorker.py @@ -42,3 +42,12 @@ class NewYorker(BasicNewsRecipe): def print_version(self, url): return url + '?printable=true' + + def postprocess_html(self, soup, x): + body = soup.find('body') + if body: + html = soup.find('html') + if html: + body.extract() + html.insert(-1, body) + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_nspm.py b/src/calibre/web/feeds/recipes/recipe_nspm.py index 0ff80b8a93..360fb35c35 100644 --- a/src/calibre/web/feeds/recipes/recipe_nspm.py +++ b/src/calibre/web/feeds/recipes/recipe_nspm.py @@ -8,9 +8,10 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Nspm(BasicNewsRecipe): - title = u'Nova srpska politicka misao' + title = 'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' publisher = 'NSPM' @@ -36,7 +37,7 @@ class Nspm(BasicNewsRecipe): preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ - dict(name=['a','img','link','object','embed']) + dict(name=['link','object','embed']) ,dict(name='td', attrs={'class':'buttonheading'}) ] @@ -50,6 +51,21 @@ class Nspm(BasicNewsRecipe): def print_version(self, url): return url.replace('.html','/stampa.html') + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + oldParent = item.parent + myIndex = oldParent.contents.index(item) + item.extract() + divtag = Tag(soup,'div') + brtag = Tag(soup,'br') + oldParent.insert(myIndex,divtag) + divtag.append(item) + divtag.append(brtag) + return soup + def preprocess_html(self, soup): lng = 'sr-Latn-RS' soup.html['xml:lang'] = lng @@ -59,4 +75,4 @@ class Nspm(BasicNewsRecipe): ftag['content'] = lng for item in soup.findAll(style=True): del item['style'] - return soup + return self.cleanup_image_tags(soup) diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware.py b/src/calibre/web/feeds/recipes/recipe_tomshardware.py index 13d164d0e6..af080ccbb7 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware.py @@ -63,6 +63,17 @@ class Tomshardware(BasicNewsRecipe): rind = 'http://www.tomshardware.com/review_print.php?p1=' return rind + article_id + def cleanup_image_tags(self,soup): + for item in soup.findAll('img'): + for attrib in ['height','width','border','align']: + if item.has_key(attrib): + del item[attrib] + return soup + def preprocess_html(self, soup): del(soup.body['onload']) - return soup + for item in soup.findAll(style=True): + del item['style'] + for it in soup.findAll('span'): + it.name="div" + return self.cleanup_image_tags(soup) diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 2ae705e01a..cbe048a011 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -180,8 +180,11 @@ class RecursiveFetcher(object): delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(delta) - if re.search(r'\s+', url) is not None: - url = quote(url) + if re.search(r'\s+|,', url) is not None: + purl = list(urlparse.urlparse(url)) + for i in range(2, 6): + purl[i] = quote(purl[i]) + url = urlparse.urlunparse(purl) with self.browser_lock: try: with closing(self.browser.open(url)) as f: