Merge from trunk

This commit is contained in:
Charles Haley 2011-04-16 17:02:01 +01:00
commit afba4ef2b1
15 changed files with 1298 additions and 623 deletions

View File

@ -23,7 +23,7 @@ class BigOven(BasicNewsRecipe):
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
@ -36,29 +36,38 @@ class BigOven(BasicNewsRecipe):
remove_attributes = ['style', 'font']
remove_tags = [dict(name='div', attrs={'class':['ppy-caption']})
,dict(name='div', attrs={'id':['float_corner']})
]
def get_article_url(self, article):
url = article.get('feedburner_origlink',article.get('link', None))
front, middle, end = url.partition('comhttp//www.bigoven.com')
url = front + 'com' + end
return url
keep_only_tags = [dict(name='div', attrs={'id':['nosidebar_main']})]
remove_tags_after = [dict(name='div', attrs={'class':['display-field']})]
remove_tags = [dict(name='ul', attrs={'class':['tabs']})]
preprocess_regexps = [
(re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''),
(re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
]
def preprocess_html(self, soup):
for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
tag.replaceWith(tag.string)
for tag in soup.findAll(name='a', text=re.compile(r'.*View Metric.*', re.DOTALL)):
tag.parent.parent.extract()
for tag in soup.findAll(name='a', text=re.compile(r'.*Add my own photo.*', re.DOTALL)):
tag.parent.parent.extract()
for tag in soup.findAll(name='div', attrs={'class':['container']}):
if tag.find(name='h1'):
continue
if tag.find(name='h2', text=re.compile(r'.*Ingredients.*', re.DOTALL)):
print 'tag found Ingred h2'
continue
if tag.find(name='h2', text=re.compile(r'Preparation.*', re.DOTALL)):
print 'tag found Prep h2'
continue
tag.extract()
tag.parent.parent.extract()
for tag in soup.findAll(text=re.compile(r'.*Try BigOven Pro for Free.*', re.DOTALL)):
tag.extract()
for tag in soup.findAll(text=re.compile(r'.*Add my photo of this recipe.*', re.DOTALL)):
tag.parent.extract()
for tag in soup.findAll(name='a', text=re.compile(r'.*photo contest.*', re.DOTALL)):
tag.parent.extract()
for tag in soup.findAll(name='a', text='Remove ads'):
tag.parent.parent.extract()
for tag in soup.findAll(name='ol', attrs={'class':['recipe-tags']}):
tag.parent.extract()
return soup
feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
feeds = [(u'Recent Raves', u'http://www.bigoven.com/rss/recentraves'),
(u'Recipe Of The Day', u'http://feeds.feedburner.com/bigovencom-RecipeOfTheDay')]

View File

@ -18,7 +18,6 @@ class IrishTimes(BasicNewsRecipe):
oldest_article = 1.0
max_articles_per_feed = 100
no_stylesheets = True
simultaneous_downloads= 5
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
remove_tags = [dict(name='div', attrs={'class':'footer'})]
@ -26,17 +25,17 @@ class IrishTimes(BasicNewsRecipe):
feeds = [
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'),
('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'),
('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'),
('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'),
('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'),
('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'),
('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'),
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'),
('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'),
('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'),
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
@ -57,5 +56,3 @@ class IrishTimes(BasicNewsRecipe):
def get_article_url(self, article):
return article.link

View File

@ -1,12 +1,12 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
nspm.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import NavigableString
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
class Nspm(BasicNewsRecipe):
title = 'Nova srpska politicka misao'
@ -21,7 +21,6 @@ class Nspm(BasicNewsRecipe):
INDEX = 'http://www.nspm.rs/?alphabet=l'
encoding = 'utf-8'
language = 'sr'
delay = 2
remove_empty_feeds = True
publication_type = 'magazine'
masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
@ -29,27 +28,21 @@ class Nspm(BasicNewsRecipe):
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: "Times New Roman", serif1, serif}
.article_description{font-family: Arial, sans1, sans-serif}
img{margin-top:0.5em; margin-bottom: 0.7em}
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
.author{color: #990000; font-weight: bold}
.author,.createdate{font-size: 0.9em} """
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(attrs={'id':'jsn-mainbody'})]
remove_tags = [
dict(name=['link','object','embed','script','meta','base','iframe'])
,dict(attrs={'class':'buttonheading'})
]
remove_tags_before = dict(attrs={'class':'contentheading'})
remove_tags_after = dict(attrs={'class':'article_separator'})
remove_attributes = ['width','height']
remove_tags = [dict(name=['link','script','meta','base','img'])]
remove_attributes = ['width','height','lang','xmlns:fb','xmlns:og','vspace','hspace','type','start','size']
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -57,21 +50,67 @@ class Nspm(BasicNewsRecipe):
return br
feeds = [
(u'Rubrike' , u'http://www.nspm.rs/rubrike/feed/rss.html')
,(u'Debate' , u'http://www.nspm.rs/debate/feed/rss.html')
,(u'Reci i misli' , u'http://www.nspm.rs/reci-i-misli/feed/rss.html')
(u'Rubrike' , u'http://www.nspm.rs/rubrike/feed/rss.html' )
,(u'Debate' , u'http://www.nspm.rs/debate/feed/rss.html' )
,(u'Reci i misli' , u'http://www.nspm.rs/reci-i-misli/feed/rss.html' )
,(u'Samo smeh srbina spasava', u'http://www.nspm.rs/samo-smeh-srbina-spasava/feed/rss.html')
,(u'Polemike' , u'http://www.nspm.rs/polemike/feed/rss.html')
,(u'Prikazi' , u'http://www.nspm.rs/prikazi/feed/rss.html')
,(u'Prenosimo' , u'http://www.nspm.rs/prenosimo/feed/rss.html')
,(u'Hronika' , u'http://www.nspm.rs/tabela/hronika/feed/rss.html')
,(u'Polemike' , u'http://www.nspm.rs/polemike/feed/rss.html' )
,(u'Prikazi' , u'http://www.nspm.rs/prikazi/feed/rss.html' )
,(u'Prenosimo' , u'http://www.nspm.rs/prenosimo/feed/rss.html' )
,(u'Hronika' , u'http://www.nspm.rs/tabela/hronika/feed/rss.html' )
]
def preprocess_html(self, soup):
for item in soup.body.findAll(style=True):
del item['style']
for item in soup.body.findAll('h1'):
nh = NavigableString(item.a.string)
item.a.extract()
item.insert(0,nh)
return self.adeify_images(soup)
atitle = soup.body.find('a',attrs={'class':'contentpagetitle'})
if atitle:
cleanTitle = Tag(soup,'h1',[('class','contentpagetitle')])
cnt = NavigableString(self.tag_to_string(atitle))
cleanTitle.append(cnt)
author = soup.body.find('span',attrs={'class':'author'})
if author:
author.extract()
author.name = 'div'
crdate = soup.body.find('td',attrs={'class':'createdate'})
if crdate:
cleanCrdate = Tag(soup,'div',[('class','createdate')])
cnt = NavigableString(self.tag_to_string(crdate))
cleanCrdate.append(cnt)
#get the dependant element
artText = Tag(soup,'div',[('class','text')])
textHolderp = crdate.parent
textHolder = textHolderp.nextSibling
while textHolder and (not isinstance(textHolder,Tag) or (textHolder.name <> textHolderp.name)):
textHolder = textHolder.nextSibling
if textHolder.td:
artText = textHolder.td
artText.name = 'div'
artText.attrs = []
artText['class'] = 'text'
artText.extract()
soup.body.contents=[]
soup.body.append(cleanTitle)
soup.body.append(author)
soup.body.append(cleanCrdate)
soup.body.append(artText)
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -1,4 +1,3 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -19,11 +18,11 @@ class SueddeutcheZeitung(BasicNewsRecipe):
encoding = 'cp1252'
needs_subscription = True
remove_empty_feeds = True
delay = 2
delay = 1
PREFIX = 'http://www.sueddeutsche.de'
INDEX = PREFIX + '/app/epaper/textversion/'
use_embedded_content = False
masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/logo.gif'
masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif'
language = 'de'
publication_type = 'newspaper'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
@ -36,7 +35,7 @@ class SueddeutcheZeitung(BasicNewsRecipe):
, 'linearize_tables' : True
}
remove_attributes = ['height','width']
remove_attributes = ['height','width','style']
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -50,24 +49,37 @@ class SueddeutcheZeitung(BasicNewsRecipe):
remove_tags =[
dict(attrs={'class':'hidePrint'})
,dict(name=['link','object','embed','base','iframe'])
,dict(name=['link','object','embed','base','iframe','br'])
]
keep_only_tags = [dict(attrs={'class':'artikelBox'})]
remove_tags_before = dict(attrs={'class':'artikelTitel'})
remove_tags_after = dict(attrs={'class':'author'})
feeds = [
(u'Politik' , INDEX + 'Politik/' )
,(u'Seite drei' , INDEX + 'Seite+drei/' )
,(u'Meinungsseite', INDEX + 'Meinungsseite/')
,(u'Wissen' , INDEX + 'Wissen/' )
,(u'Panorama' , INDEX + 'Panorama/' )
,(u'Feuilleton' , INDEX + 'Feuilleton/' )
,(u'Medien' , INDEX + 'Medien/' )
,(u'Wirtschaft' , INDEX + 'Wirtschaft/' )
,(u'Sport' , INDEX + 'Sport/' )
,(u'Bayern' , INDEX + 'Bayern/' )
,(u'Muenchen' , INDEX + 'M%FCnchen/' )
(u'Politik' , INDEX + 'Politik/' )
,(u'Seite drei' , INDEX + 'Seite+drei/' )
,(u'Meinungsseite' , INDEX + 'Meinungsseite/')
,(u'Wissen' , INDEX + 'Wissen/' )
,(u'Panorama' , INDEX + 'Panorama/' )
,(u'Feuilleton' , INDEX + 'Feuilleton/' )
,(u'Medien' , INDEX + 'Medien/' )
,(u'Wirtschaft' , INDEX + 'Wirtschaft/' )
,(u'Sport' , INDEX + 'Sport/' )
,(u'Bayern' , INDEX + 'Bayern/' )
,(u'Muenchen' , INDEX + 'M%FCnchen/' )
,(u'Muenchen City' , INDEX + 'M%FCnchen+City/' )
,(u'Jetzt.de' , INDEX + 'Jetzt.de/' )
,(u'Reise' , INDEX + 'Reise/' )
,(u'SZ Extra' , INDEX + 'SZ+Extra/' )
,(u'Wochenende' , INDEX + 'SZ+am+Wochenende/' )
,(u'Stellen-Markt' , INDEX + 'Stellen-Markt/')
,(u'Motormarkt' , INDEX + 'Motormarkt/')
,(u'Immobilien-Markt', INDEX + 'Immobilien-Markt/')
,(u'Thema' , INDEX + 'Thema/' )
,(u'Forum' , INDEX + 'Forum/' )
,(u'Leute' , INDEX + 'Leute/' )
,(u'Jugend' , INDEX + 'Jugend/' )
,(u'Beilage' , INDEX + 'Beilage/' )
]
def parse_index(self):

View File

@ -51,6 +51,8 @@ Run an embedded python interpreter.
'with sqlite3 works.')
parser.add_option('-p', '--py-console', help='Run python console',
default=False, action='store_true')
parser.add_option('-m', '--inspect-mobi',
help='Inspect the MOBI file at the specified path', default=None)
return parser
@ -227,6 +229,9 @@ def main(args=sys.argv):
if len(args) > 1 and os.access(args[-1], os.R_OK):
sql_dump = args[-1]
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
elif opts.inspect_mobi is not None:
from calibre.ebooks.mobi.debug import inspect_mobi
inspect_mobi(opts.inspect_mobi)
else:
from calibre import ipython
ipython()

View File

@ -108,7 +108,7 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD']
'MB860', 'MULTI-CARD', 'MID7015A']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7']

View File

@ -175,18 +175,18 @@ class EPUBInput(InputFormatPlugin):
raise ValueError(
'EPUB files with DTBook markup are not supported')
not_for_spine = set()
for y in opf.itermanifest():
id_ = y.get('id', None)
if id_ and y.get('media-type', None) in \
('application/vnd.adobe-page-template+xml',):
not_for_spine.add(id_)
for x in list(opf.iterspine()):
ref = x.get('idref', None)
if ref is None:
if ref is None or ref in not_for_spine:
x.getparent().remove(x)
continue
for y in opf.itermanifest():
if y.get('id', None) == ref and y.get('media-type', None) in \
('application/vnd.adobe-page-template+xml',):
p = x.getparent()
if p is not None:
p.remove(x)
break
with open('content.opf', 'wb') as nopf:
nopf.write(opf.render())

View File

@ -259,6 +259,7 @@ class MetadataUpdater(object):
trail = len(new_record0.getvalue()) % 4
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
new_record0.write(pad)
new_record0.write('\0'*(1024*8))
# Rebuild the stream, update the pdbrecords pointers
self.patchSection(0,new_record0.getvalue())

View File

@ -0,0 +1,408 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct, datetime
from calibre.utils.date import utc_tz
from calibre.ebooks.mobi.langcodes import main_language, sub_language
class PalmDOCAttributes(object):
class Attr(object):
def __init__(self, name, field, val):
self.name = name
self.val = val & field
def __str__(self):
return '%s: %s'%(self.name, bool(self.val))
def __init__(self, raw):
self.val = struct.unpack(b'<H', raw)[0]
self.attributes = []
for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
('Backup this database', 0x08),
('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
('Force the PalmPilot to reset after this database is installed', 0x12),
('Don\'t allow copy of file to be beamed to other Pilot',
0x14)]:
self.attributes.append(PalmDOCAttributes.Attr(name, field,
self.val))
def __str__(self):
attrs = '\n\t'.join([str(x) for x in self.attributes])
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
class PalmDB(object):
def __init__(self, raw):
self.raw = raw
if self.raw.startswith(b'TPZ'):
raise ValueError('This is a Topaz file')
self.name = self.raw[:32].replace(b'\x00', b'')
self.attributes = PalmDOCAttributes(self.raw[32:34])
self.version = struct.unpack(b'>H', self.raw[34:36])[0]
palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
self.creation_date = (palm_epoch +
datetime.timedelta(seconds=self.creation_date_raw))
self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
self.modification_date = (palm_epoch +
datetime.timedelta(seconds=self.modification_date_raw))
self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
self.last_backup_date = (palm_epoch +
datetime.timedelta(seconds=self.last_backup_date_raw))
self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
self.app_info_id = self.raw[52:56]
self.sort_info_id = self.raw[56:60]
self.type = self.raw[60:64]
self.creator = self.raw[64:68]
self.ident = self.type + self.creator
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
raise ValueError('Unknown book ident: %r'%self.ident)
self.uid_seed = self.raw[68:72]
self.next_rec_list_id = self.raw[72:76]
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
def __str__(self):
ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
ans.append('Name: %r'%self.name)
ans.append(str(self.attributes))
ans.append('Version: %s'%self.version)
ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
self.creation_date_raw))
ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
self.modification_date_raw))
ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
self.last_backup_date_raw))
ans.append('Modification number: %s'%self.modification_number)
ans.append('App Info ID: %r'%self.app_info_id)
ans.append('Sort Info ID: %r'%self.sort_info_id)
ans.append('Type: %r'%self.type)
ans.append('Creator: %r'%self.creator)
ans.append('UID seed: %r'%self.uid_seed)
ans.append('Next record list id: %r'%self.next_rec_list_id)
ans.append('Number of records: %s'%self.number_of_records)
return '\n'.join(ans)
class Record(object):
def __init__(self, raw, header):
self.offset, self.flags, self.uid = header
self.raw = raw
@property
def header(self):
return 'Offset: %d Flags: %d UID: %d'%(self.offset, self.flags,
self.uid)
class EXTHRecord(object):
def __init__(self, type_, data):
self.type = type_
self.data = data
self.name = {
1 : 'DRM Server id',
2 : 'DRM Commerce id',
3 : 'DRM ebookbase book id',
100 : 'author',
101 : 'publisher',
102 : 'imprint',
103 : 'description',
104 : 'isbn',
105 : 'subject',
106 : 'publishingdate',
107 : 'review',
108 : 'contributor',
109 : 'rights',
110 : 'subjectcode',
111 : 'type',
112 : 'source',
113 : 'asin',
114 : 'versionnumber',
115 : 'sample',
116 : 'startreading',
117 : 'adult',
118 : 'retailprice',
119 : 'retailpricecurrency',
201 : 'coveroffset',
202 : 'thumboffset',
203 : 'hasfakecover',
204 : 'Creator Software',
205 : 'Creator Major Version', # '>I'
206 : 'Creator Minor Version', # '>I'
207 : 'Creator Build Number', # '>I'
208 : 'watermark',
209 : 'tamper_proof_keys',
300 : 'fontsignature',
301 : 'clippinglimit', # percentage '>B'
402 : 'publisherlimit',
404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
501 : 'cdetype', # 4 chars (PDOC or EBOK)
502 : 'lastupdatetime',
503 : 'updatedtitle',
}.get(self.type, repr(self.type))
if self.name in ('coveroffset', 'thumboffset', 'hasfakecover',
'Creator Major Version', 'Creator Minor Version',
'Creator Build Number', 'Creator Software', 'startreading'):
self.data, = struct.unpack(b'>I', self.data)
def __str__(self):
return '%s (%d): %r'%(self.name, self.type, self.data)
class EXTHHeader(object):
def __init__(self, raw):
self.raw = raw
if not self.raw.startswith(b'EXTH'):
raise ValueError('EXTH header does not start with EXTH')
self.length, = struct.unpack(b'>I', self.raw[4:8])
self.count, = struct.unpack(b'>I', self.raw[8:12])
pos = 12
self.records = []
for i in xrange(self.count):
pos = self.read_record(pos)
def read_record(self, pos):
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
data = self.raw[(pos+8):(pos+length)]
self.records.append(EXTHRecord(type_, data))
return pos + length
def __str__(self):
ans = ['*'*20 + ' EXTH Header '+ '*'*20]
ans.append('EXTH header length: %d'%self.length)
ans.append('Number of EXTH records: %d'%self.count)
ans.append('EXTH records...')
for r in self.records:
ans.append(str(r))
return '\n'.join(ans)
class MOBIHeader(object):
def __init__(self, record0):
self.raw = record0.raw
self.compression_raw = self.raw[:2]
self.compression = {1: 'No compression', 2: 'PalmDoc compression',
17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
self.compression_raw)[0],
repr(self.compression_raw))
self.unused = self.raw[2:4]
self.text_length, = struct.unpack(b'>I', self.raw[4:8])
self.number_of_text_records, self.text_record_size = \
struct.unpack(b'>HH', self.raw[8:12])
self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
self.encryption_type = {0: 'No encryption',
1: 'Old mobipocket encryption',
2:'Mobipocket encryption'}.get(self.encryption_type_raw,
repr(self.encryption_type_raw))
self.unknown = self.raw[14:16]
self.identifier = self.raw[16:20]
if self.identifier != b'MOBI':
raise ValueError('Identifier %r unknown'%self.identifier)
self.length, = struct.unpack(b'>I', self.raw[20:24])
self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
self.type = {
2 : 'Mobipocket book',
3 : 'PalmDOC book',
4 : 'Audio',
257 : 'News',
258 : 'News Feed',
259 : 'News magazine',
513 : 'PICS',
514 : 'Word',
515 : 'XLS',
516 : 'PPT',
517 : 'TEXT',
518 : 'HTML',
}.get(self.type_raw, repr(self.type_raw))
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
self.encoding = {
1252 : 'cp1252',
65001: 'utf-8',
}.get(self.encoding_raw, repr(self.encoding_raw))
self.uid = self.raw[32:36]
self.file_version = struct.unpack(b'>I', self.raw[36:40])
self.reserved = self.raw[40:48]
self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
self.reserved2 = self.raw[52:80]
self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
langcode = self.locale_raw
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
self.language = main_language.get(langid, 'ENGLISH')
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
self.input_language = self.raw[96:100]
self.output_langauage = self.raw[100:104]
self.min_version, = struct.unpack(b'>I', self.raw[104:108])
self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
self.unknown2 = self.raw[120:128]
self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
self.has_exth = bool(self.exth_flags & 0x40)
self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
if self.has_drm_data:
self.unknown3 = self.raw[132:164]
self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
self.has_fcis_flis = False
if self.has_extra_data_flags:
self.unknown4 = self.raw[180:192]
self.first_content_record, self.last_content_record = \
struct.unpack(b'>HH', self.raw[192:196])
self.unknown5, = struct.unpack(b'>I', self.raw[196:200])
(self.fcis_number, self.fcis_count, self.flis_number,
self.flis_count) = struct.unpack(b'>IIII',
self.raw[200:216])
self.unknown6 = self.raw[216:240]
self.extra_data_flags = bin(struct.unpack(b'>I',
self.raw[240:244])[0])
self.primary_index_record, = struct.unpack(b'>I',
self.raw[244:248])
if self.has_exth:
self.exth_offset = 16 + self.length
self.exth = EXTHHeader(self.raw[self.exth_offset:])
self.end_of_exth = self.exth_offset + self.exth.length
self.bytes_after_exth = self.fullname_offset - self.end_of_exth
def __str__(self):
ans = ['*'*20 + ' MOBI Header '+ '*'*20]
ans.append('Compression: %s'%self.compression)
ans.append('Unused: %r'%self.unused)
ans.append('Number of text records: %d'%self.number_of_text_records)
ans.append('Text record size: %d'%self.text_record_size)
ans.append('Encryption: %s'%self.encryption_type)
ans.append('Unknown: %r'%self.unknown)
ans.append('Identifier: %r'%self.identifier)
ans.append('Header length: %d'% self.length)
ans.append('Type: %s'%self.type)
ans.append('Encoding: %s'%self.encoding)
ans.append('UID: %r'%self.uid)
ans.append('File version: %d'%self.file_version)
ans.append('Reserved: %r'%self.reserved)
ans.append('Secondary index record: %d (null val: %d)'%(
self.secondary_index_record, 0xffffffff))
ans.append('Reserved2: %r'%self.reserved2)
ans.append('First non-book record: %d'% self.first_non_book_record)
ans.append('Full name offset: %d'%self.fullname_offset)
ans.append('Full name length: %d bytes'%self.fullname_length)
ans.append('Langcode: %r'%self.locale_raw)
ans.append('Language: %s'%self.language)
ans.append('Sub language: %s'%self.sublanguage)
ans.append('Input language: %r'%self.input_language)
ans.append('Output language: %r'%self.output_langauage)
ans.append('Min version: %d'%self.min_version)
ans.append('First Image index: %d'%self.first_image_index)
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
ans.append('Huffman record count: %d'%self.huffman_record_count)
ans.append('Unknown2: %r'%self.unknown2)
ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth))
if self.has_drm_data:
ans.append('Unknown3: %r'%self.unknown3)
ans.append('DRM Offset: %s'%self.drm_offset)
ans.append('DRM Count: %s'%self.drm_count)
ans.append('DRM Size: %s'%self.drm_size)
ans.append('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags:
ans.append('Unknown4: %r'%self.unknown4)
ans.append('First content record: %d'% self.first_content_record)
ans.append('Last content record: %d'% self.last_content_record)
ans.append('Unknown5: %d'% self.unknown5)
ans.append('FCIS number: %d'% self.fcis_number)
ans.append('FCIS count: %d'% self.fcis_count)
ans.append('FLIS number: %d'% self.flis_number)
ans.append('FLIS count: %d'% self.flis_count)
ans.append('Unknown6: %r'% self.unknown6)
ans.append('Extra data flags: %r'%self.extra_data_flags)
ans.append('Primary index record: %d'%self.primary_index_record)
ans = '\n'.join(ans)
if self.has_exth:
ans += '\n\n' + str(self.exth)
ans += '\n\nBytes after EXTH: %d'%self.bytes_after_exth
ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
self.fullname_length))
ans += '\nRecord 0 length: %d'%len(self.raw)
return ans
class MOBIFile(object):
def __init__(self, stream):
self.raw = stream.read()
self.palmdb = PalmDB(self.raw[:78])
self.record_headers = []
self.records = []
for i in xrange(self.palmdb.number_of_records):
pos = 78 + i * 8
offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
flags, val = a1, a2 << 16 | a3 << 8 | a4
self.record_headers.append((offset, flags, val))
def section(section_number):
if section_number == self.palmdb.number_of_records - 1:
end_off = len(self.raw)
else:
end_off = self.record_headers[section_number + 1][0]
off = self.record_headers[section_number][0]
return self.raw[off:end_off]
for i in range(self.palmdb.number_of_records):
self.records.append(Record(section(i), self.record_headers[i]))
self.mobi_header = MOBIHeader(self.records[0])
def print_header(self):
print (str(self.palmdb).encode('utf-8'))
print ()
print ('Record headers:')
for i, r in enumerate(self.records):
print ('%6d. %s'%(i, r.header))
print ()
print (str(self.mobi_header).encode('utf-8'))
def inspect_mobi(path_or_stream):
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
open(path_or_stream, 'rb'))
f = MOBIFile(stream)
f.print_header()
if __name__ == '__main__':
import sys
f = MOBIFile(open(sys.argv[1], 'rb'))
f.print_header()

View File

@ -7,8 +7,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam> and \
Kovid Goyal <kovid@kovidgoyal.net>'
from collections import defaultdict
from itertools import count
from itertools import izip
import random
import re
from struct import pack
@ -1511,7 +1509,7 @@ class MobiWriter(object):
record0.write(exth)
record0.write(title)
record0 = record0.getvalue()
self._records[0] = record0 + ('\0' * (2452 - len(record0)))
self._records[0] = record0 + ('\0' * (1024*8))
def _build_exth(self):
oeb = self._oeb
@ -1630,8 +1628,8 @@ class MobiWriter(object):
self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
offset = self._tell() + (8 * nrecords) + 2
for id, record in izip(count(), self._records):
self._write(pack('>I', offset), '\0', pack('>I', id)[1:])
for i, record in enumerate(self._records):
self._write(pack('>I', offset), '\0', pack('>I', 2*i)[1:])
offset += len(record)
self._write('\0\0')

View File

@ -43,7 +43,7 @@ class ViewAction(InterfaceAction):
ac = self.view_specific_action = QAction(_('View specific format'),
self.gui)
self.qaction.setMenu(self.view_menu)
ac.setShortcut((Qt.ControlModifier if isosx else Qt.AltModifier)+Qt.Key_V)
ac.setShortcut(Qt.AltModifier+Qt.Key_V)
ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
ac = self.view_action = QAction(self.qaction.icon(),
self.qaction.text(), self.gui)

View File

@ -205,6 +205,7 @@ class SearchBar(QWidget): # {{{
x.setObjectName("search_option_button")
l.addWidget(x)
x.setToolTip(_("Change the way searching for books works"))
x.setVisible(False)
x = parent.saved_search = SavedSearchBox(self)
x.setMaximumSize(QSize(150, 16777215))

View File

@ -487,7 +487,13 @@ menu, choose "Validate fonts".
I downloaded the installer, but it is not working?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Downloading from the internet can sometimes result in a corrupted download. If the |app| installer you downloaded is not opening, try downloading it again. If re-downloading it does not work, download it from `an alternate location <http://sourceforge.net/projects/calibre/files/>`_. If the installer still doesn't work, then something on your computer is preventing it from running. Try rebooting your computer and running a registry cleaner like `Wise registry cleaner <http://www.wisecleaner.com>`_. Best place to ask for more help is in the `forums <http://www.mobileread.com/forums/usercp.php>`_.
Downloading from the internet can sometimes result in a corrupted download. If the |app| installer you downloaded is not opening, try downloading it again. If re-downloading it does not work, download it from `an alternate location <http://sourceforge.net/projects/calibre/files/>`_. If the installer still doesn't work, then something on your computer is preventing it from running.
* Try temporarily disabling your antivirus program (Microsoft Security Essentials, or Kaspersky or Norton or McAfee or whatever). This is most likely the culprit if the upgrade process is hanging in the middle.
* Try rebooting your computer and running a registry cleaner like `Wise registry cleaner <http://www.wisecleaner.com>`_.
* Try downloading the installer with an alternate browser. For example if you are using Internet Explorer, try using Firefox or Chrome instead.
Best place to ask for more help is in the `forums <http://www.mobileread.com/forums/forumdisplay.php?f=166>`_.
My antivirus program claims |app| is a virus/trojan?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

File diff suppressed because it is too large Load Diff

View File

@ -130,7 +130,14 @@ def utcnow():
return datetime.utcnow().replace(tzinfo=_utc_tz)
def utcfromtimestamp(stamp):
return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz)
try:
return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz)
except ValueError:
# Raised if stamp if out of range for the platforms gmtime function
# We print the error for debugging, but otherwise ignore it
import traceback
traceback.print_exc()
return utcnow()
def format_date(dt, format, assume_utc=False, as_utc=False):
''' Return a date formatted as a string using a subset of Qt's formatting codes '''