diff --git a/recipes/sueddeutschezeitung.recipe b/recipes/sueddeutschezeitung.recipe index 48618fe996..6aa2347b04 100644 --- a/recipes/sueddeutschezeitung.recipe +++ b/recipes/sueddeutschezeitung.recipe @@ -1,4 +1,3 @@ - __license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' @@ -19,11 +18,11 @@ class SueddeutcheZeitung(BasicNewsRecipe): encoding = 'cp1252' needs_subscription = True remove_empty_feeds = True - delay = 2 + delay = 1 PREFIX = 'http://www.sueddeutsche.de' INDEX = PREFIX + '/app/epaper/textversion/' use_embedded_content = False - masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/logo.gif' + masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif' language = 'de' publication_type = 'newspaper' extra_css = ' body{font-family: Arial,Helvetica,sans-serif} ' @@ -36,7 +35,7 @@ class SueddeutcheZeitung(BasicNewsRecipe): , 'linearize_tables' : True } - remove_attributes = ['height','width'] + remove_attributes = ['height','width','style'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -50,24 +49,37 @@ class SueddeutcheZeitung(BasicNewsRecipe): remove_tags =[ dict(attrs={'class':'hidePrint'}) - ,dict(name=['link','object','embed','base','iframe']) + ,dict(name=['link','object','embed','base','iframe','br']) ] keep_only_tags = [dict(attrs={'class':'artikelBox'})] remove_tags_before = dict(attrs={'class':'artikelTitel'}) remove_tags_after = dict(attrs={'class':'author'}) feeds = [ - (u'Politik' , INDEX + 'Politik/' ) - ,(u'Seite drei' , INDEX + 'Seite+drei/' ) - ,(u'Meinungsseite', INDEX + 'Meinungsseite/') - ,(u'Wissen' , INDEX + 'Wissen/' ) - ,(u'Panorama' , INDEX + 'Panorama/' ) - ,(u'Feuilleton' , INDEX + 'Feuilleton/' ) - ,(u'Medien' , INDEX + 'Medien/' ) - ,(u'Wirtschaft' , INDEX + 'Wirtschaft/' ) - ,(u'Sport' , INDEX + 'Sport/' ) - ,(u'Bayern' , INDEX + 'Bayern/' ) - ,(u'Muenchen' , INDEX + 'M%FCnchen/' ) + (u'Politik' , INDEX + 'Politik/' ) + ,(u'Seite drei' , INDEX + 'Seite+drei/' ) + ,(u'Meinungsseite' , INDEX + 'Meinungsseite/') + ,(u'Wissen' , INDEX + 'Wissen/' ) + ,(u'Panorama' , INDEX + 'Panorama/' ) + ,(u'Feuilleton' , INDEX + 'Feuilleton/' ) + ,(u'Medien' , INDEX + 'Medien/' ) + ,(u'Wirtschaft' , INDEX + 'Wirtschaft/' ) + ,(u'Sport' , INDEX + 'Sport/' ) + ,(u'Bayern' , INDEX + 'Bayern/' ) + ,(u'Muenchen' , INDEX + 'M%FCnchen/' ) + ,(u'Muenchen City' , INDEX + 'M%FCnchen+City/' ) + ,(u'Jetzt.de' , INDEX + 'Jetzt.de/' ) + ,(u'Reise' , INDEX + 'Reise/' ) + ,(u'SZ Extra' , INDEX + 'SZ+Extra/' ) + ,(u'Wochenende' , INDEX + 'SZ+am+Wochenende/' ) + ,(u'Stellen-Markt' , INDEX + 'Stellen-Markt/') + ,(u'Motormarkt' , INDEX + 'Motormarkt/') + ,(u'Immobilien-Markt', INDEX + 'Immobilien-Markt/') + ,(u'Thema' , INDEX + 'Thema/' ) + ,(u'Forum' , INDEX + 'Forum/' ) + ,(u'Leute' , INDEX + 'Leute/' ) + ,(u'Jugend' , INDEX + 'Jugend/' ) + ,(u'Beilage' , INDEX + 'Beilage/' ) ] def parse_index(self): diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 86ed3e9ef9..9c5318a5e7 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -141,7 +141,7 @@ class EXTHRecord(object): 204 : 'Creator Software', 205 : 'Creator Major Version', # '>I' 206 : 'Creator Minor Version', # '>I' - 207 : 'Creator Build number', # '>I' + 207 : 'Creator Build Number', # '>I' 208 : 'watermark', 209 : 'tamper_proof_keys', 300 : 'fontsignature', @@ -153,6 +153,11 @@ class EXTHRecord(object): 503 : 'updatedtitle', }.get(self.type, repr(self.type)) + if self.name in ('coveroffset', 'thumboffset', 'hasfakecover', + 'Creator Major Version', 'Creator Minor Version', + 'Creator Build Number', 'Creator Software', 'startreading'): + self.data, = struct.unpack(b'>I', self.data) + def __str__(self): return '%s (%d): %r'%(self.name, self.type, self.data) @@ -235,7 +240,9 @@ class MOBIHeader(object): }.get(self.encoding_raw, repr(self.encoding_raw)) self.uid = self.raw[32:36] self.file_version = struct.unpack(b'>I', self.raw[36:40]) - self.reserved = self.raw[40:80] + self.reserved = self.raw[40:48] + self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52]) + self.reserved2 = self.raw[52:80] self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84]) self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88]) self.fullname_length, = struct.unpack(b'>I', self.raw[88:92]) @@ -255,18 +262,28 @@ class MOBIHeader(object): self.unknown2 = self.raw[120:128] self.exth_flags, = struct.unpack(b'>I', self.raw[128:132]) self.has_exth = bool(self.exth_flags & 0x40) - self.has_drm_data = self.length >= 184 and len(self.raw) >= 184 + self.has_drm_data = self.length >= 174 and len(self.raw) >= 180 if self.has_drm_data: self.unknown3 = self.raw[132:164] self.drm_offset, = struct.unpack(b'>I', self.raw[164:168]) self.drm_count, = struct.unpack(b'>I', self.raw[168:172]) self.drm_size, = struct.unpack(b'>I', self.raw[172:176]) self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0]) - self.has_extra_data_flags = self.length >= 244 and len(self.raw) >= 244 + self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16 + self.has_fcis_flis = False if self.has_extra_data_flags: - self.unknown4 = self.raw[180:242] - self.extra_data_flags = bin(struct.unpack(b'>H', - self.raw[242:244])[0]) + self.unknown4 = self.raw[180:192] + self.first_content_record, self.last_content_record = \ + struct.unpack(b'>HH', self.raw[192:196]) + self.unknown5, = struct.unpack(b'>I', self.raw[196:200]) + (self.fcis_number, self.fcis_count, self.flis_number, + self.flis_count) = struct.unpack(b'>IIII', + self.raw[200:216]) + self.unknown6 = self.raw[216:240] + self.extra_data_flags = bin(struct.unpack(b'>I', + self.raw[240:244])[0]) + self.primary_index_record, = struct.unpack(b'>I', + self.raw[244:248]) if self.has_exth: self.exth_offset = 16 + self.length @@ -291,6 +308,9 @@ class MOBIHeader(object): ans.append('UID: %r'%self.uid) ans.append('File version: %d'%self.file_version) ans.append('Reserved: %r'%self.reserved) + ans.append('Secondary index record: %d (null val: %d)'%( + self.secondary_index_record, 0xffffffff)) + ans.append('Reserved2: %r'%self.reserved2) ans.append('First non-book record: %d'% self.first_non_book_record) ans.append('Full name offset: %d'%self.fullname_offset) ans.append('Full name length: %d bytes'%self.fullname_length) @@ -313,7 +333,16 @@ class MOBIHeader(object): ans.append('DRM Flags: %r'%self.drm_flags) if self.has_extra_data_flags: ans.append('Unknown4: %r'%self.unknown4) + ans.append('First content record: %d'% self.first_content_record) + ans.append('Last content record: %d'% self.last_content_record) + ans.append('Unknown5: %d'% self.unknown5) + ans.append('FCIS number: %d'% self.fcis_number) + ans.append('FCIS count: %d'% self.fcis_count) + ans.append('FLIS number: %d'% self.flis_number) + ans.append('FLIS count: %d'% self.flis_count) + ans.append('Unknown6: %r'% self.unknown6) ans.append('Extra data flags: %r'%self.extra_data_flags) + ans.append('Primary index record: %d'%self.primary_index_record) ans = '\n'.join(ans) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index ddcba63eca..2ca62f0dea 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -7,8 +7,6 @@ __copyright__ = '2008, Marshall T. Vandegrift and \ Kovid Goyal ' from collections import defaultdict -from itertools import count -from itertools import izip import random import re from struct import pack @@ -1630,8 +1628,8 @@ class MobiWriter(object): self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), 'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords)) offset = self._tell() + (8 * nrecords) + 2 - for id, record in izip(count(), self._records): - self._write(pack('>I', offset), '\0', pack('>I', id)[1:]) + for i, record in enumerate(self._records): + self._write(pack('>I', offset), '\0', pack('>I', 2*i)[1:]) offset += len(record) self._write('\0\0') diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 5a2b2669bb..9e0fab00fd 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -487,7 +487,13 @@ menu, choose "Validate fonts". I downloaded the installer, but it is not working? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Downloading from the internet can sometimes result in a corrupted download. If the |app| installer you downloaded is not opening, try downloading it again. If re-downloading it does not work, download it from `an alternate location `_. If the installer still doesn't work, then something on your computer is preventing it from running. Try rebooting your computer and running a registry cleaner like `Wise registry cleaner `_. Best place to ask for more help is in the `forums `_. +Downloading from the internet can sometimes result in a corrupted download. If the |app| installer you downloaded is not opening, try downloading it again. If re-downloading it does not work, download it from `an alternate location `_. If the installer still doesn't work, then something on your computer is preventing it from running. + + * Try rebooting your computer and running a registry cleaner like `Wise registry cleaner `_. + * Try temporarily disabling your antivirus program + * Try downloading the installer with an alternate browser. For example if you are using Internet Explorer, try using Firefox or Chrome instead. + +Best place to ask for more help is in the `forums `_. My antivirus program claims |app| is a virus/trojan? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~