This commit is contained in:
Sengian 2011-04-03 17:19:54 +02:00
commit 1ec4344baf
1582 changed files with 100541 additions and 77600 deletions

View File

@ -12,6 +12,7 @@ resources/images.qrc
resources/scripts.pickle
resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml
resources/builtin_recipes.zip
setup/installer/windows/calibre/build.log
src/calibre/translations/.errors
src/cssutils/.svn/

View File

@ -19,6 +19,216 @@
# new recipes:
# - title:
- version: 0.7.53
date: 2011-04-01
new features:
- title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So youcan specift the title/authors/series/whatever in the template."
tickets: [743535]
- title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
- title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
- title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins psecified in calibre are more likely to be the actual margins used."
- title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
tickets: [744020]
- title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
tickets: [742686]
- title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
tickets: [743486]
- title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
tickets: [743533]
- title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
tickets: [743645]
- title: "FB2 Output: Option to set the FB2 genre explicitly."
tickets: [743178]
bug fixes:
- title: "Fix text color in the search bar set to black instead of the system font color"
tickets: [746846]
- title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
tickets: [745428]
- title: "Make sorting on the device view faster and more robust."
tickets: [742626]
- title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
tickets: [745001]
- title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
tickets: [744365]
- title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
tickets: [744122]
- title: "RTF Input: Handle RTF files with too many levels of list nesting."
tickets: [743243]
improved recipes:
- Irish Times
- LifeHacker
- Estadao
- Folha de Sao Paulo
new recipes:
- title: Financieele Dagblad
author: marvin_2
- title: "Prost Amerika, WV Hooligan and SB Nation"
author: rylsfan
- title: "Cracked.com"
author: Nudgenudge
- version: 0.7.52
date: 2011-03-25
bug fixes:
- title: "Fixes a typo in 0.7.51 that broke the downloading of some news. Apologies."
tickets: [742840]
- version: 0.7.51
date: 2011-03-25
new features:
- title: "Conversion: Detect and remove fake page margins that are specified as a margin on (nearly) every paragraph."
description: "This can be turned off via an option under Structure Detection, in case it removes margins that should have been kept."
- title: "Windows build: All the python code and recipes are now put into zip files. This should decrease the amount of time the windows installer spends 'calculating free space'"
- title: "OSX and Linux: Add a setting in Preferences->Behavior to control the priority with which calibre worker processes run. This setting was already available on windows."
tickets: [741231]
- title: "Driver for HTC Thunderbolt, T-Mobile Optimus, Archos 43 and Blackberry OS6"
- title: "A new 'authors type' custom column"
- title: "When building calibre from source note that calibre now absolutely requires python >= 2.7"
- title: "Add the keyboard shortcut: Ctrl+Shift+R to restart calibre in debug mode"
bug fixes:
- title: "Fix dragging and dropping lots of books from the book list to the Tag Browser was broken"
- title: "Change the shebang in the calibre launcher script on linux to explicitly use python2 rather than python"
- title: "When adding formats do not corrupt the added file if the user tries to add an existing format to itself"
- title: "Fix drag and drop to add files that contain the # character in the filename"
- title: "Tag editor shouldn't add empty tags"
tickets: [740890]
- title: "MOBI Input: Handle MOBI files that have a too large 'number of records' field in their headers."
tickets: [740713]
- title: "News download: Update RSS feedparser module to latest version"
- title: "Various fixes to the zipfile module in calibre to handle 64 bit zipfiles and bring it up to date with the zip file module in the python stdlib"
- title: "News download: Handle titles with ASCII control codes in them."
tickets: [739322]
- title: "Make search hierarchies show simple names instead of compound ones."
- title: "Fix commas in author names being converted to pipe symbols in the book details window"
- title: "Fix PocketBook can't always find epub cover image to create thumbnail"
tickets: [9445]
improved recipes:
- "168 ora"
- "LWN weekly"
- Christian Science Monitor
- Washington Post
- West Hawaii Today
new recipes:
- title: "Planet KDE"
author: Riccardo Iaconelli
- title: "HVG"
author: Istvan Papp
- title: "Caijing Magazine"
auhtor: Eric Chen
- version: 0.7.50
date: 2011-03-18
new features:
- title: "Add 'Read a random book' to the view menu"
- title: "Add option to show composite columns in the tag browser."
- title: "Add a tweak in Preferences->Tweaks to control where news that is automatically uploaded to a reader is sent."
tickets: [9427]
- title: "Do not also show text in composite columns when showing an icon"
- title: "Add a menu item to clear the last viewed books history in the ebook viewer"
- title: "Kobo driver: Add support for the 'Closed' collection"
- title: "Add rename/delete saved search options to Tag browser context menu"
- title: "Make searches in the tag browser a possible hierarchical field"
- title: "Allow using empty username and password when setting up an SMTP relay"
tickets: [9195]
bug fixes:
- title: "Fix regression in 0.7.49 that broke deleting of news downloads older than x days."
tickets: [9417]
- title: "Restore the ability to remove missing formats from metadata.db to the Check Library operation"
tickets: [9377]
- title: "EPUB metadata: Read ISBN from Penguin epubs that dont correctly specify it"
- title: "Conversion pipeline: Handle the case where the ncx file is incorrectly given an HTML mimetype"
- title: "Make numpad navigation keys work in viewer"
tickets: [9428]
- title: "Fix ratings not being downloaded from Amazon"
- title: "Content server: Add workaround for Internet Explorer not supporting the ' entity."
tickets: [9413]
- title: "Conversion pipeline: When detecting chapters/toc links from HTML normalize spaces and increase maximum TOC title length to 1000 characters from 100 characters."
tickets: [9363]
- title: "Fix regression that broke Search and Replace on custom fields"
tickets: [9397]
- title: "Fix regression that caused currently selected row to be unfocussed int he device view when updataing metadata"
tickets: [9395]
- title: "Coversion S&R: Do not strip leading and trailing whitespace from the search and replace expressions in the GUI"
improved recipes:
- Sports Illustrated
- Draw and Cook
new recipes:
- title: "Evangelizo.org and pro-linux.de"
author: Bobus
- title: "Office Space and Modoros"
author: Zsolt Botykai
- version: 0.7.49
date: 2011-03-11
@ -47,7 +257,7 @@
- title: "When setting covers in calibre, resize to fit within a maximum size of (1200, 1600), to prevent slowdowns due to extra large covers. This size can be controlled via Preferences->Tweaks."
tickets: [9277]
bug fixes:
- title: "Fix long standing bug that caused errors when saving books to disk if the book metadata has certain chinese/russian characters on windows. The fix required some changes to how unicode paths are handled in calibre, so it might have broken something else. If so, please open a ticket."
tickets: [7250]

View File

@ -210,21 +210,23 @@ record type usual length name comments
114 versionnumber
115 sample
116 startreading
118 retail price (as text)
119 retail price currency (as text)
201 coveroffset
202 thumboffset
117 3 adult Mobipocket Creator adds this if Adult only is checked; contents: "yes"
118 retail price As text, e.g. "4.99"
119 retail price currency As text, e.g. "USD"
201 4 coveroffset Add to first image field in Mobi Header to find PDB record containing the cover image
202 4 thumboffset Add to first image field in Mobi Header to find PDB record containing the thumbnail cover image
203 hasfakecover
204 204 Unknown
205 205 Unknown
206 206 Unknown
207 207 Unknown
208 208 Unknown
300 300 Unknown
401 clippinglimit
204 4 Creator Software Records 204-207 are usually the same for all books from a certain source, e.g. 1-6-2-41 for Baen and 201-1-0-85 for project gutenberg, 200-1-0-85 for amazon when converted to a 32 bit integer.
205 4 Creator Major Version
206 4 Creator Minor Version
207 4 Creator Build Number
208 watermark
209 tamper proof keys Used by the Kindle (and Android app) for generating book-specific PIDs.
300 fontsignature
401 1 clippinglimit
402 publisherlimit
403 403 Unknown
404 404 ttsflag
403 403 Unknown 1 - Text to Speech disabled; 0 - Text to Speech enabled
404 1 404 ttsflag
501 4 cdetype PDOC - Personal Doc;
EBOK - ebook;
502 lastupdatetime
@ -287,9 +289,9 @@ content at the beginning of the following record. The trailing entry ends with
a byte containing a count of the overlapping bytes plus additional flags.
offset bytes content comments
0 0-3 N terminal bytes
0 0-3 N terminal bytes
of a multibyte
character
character
N 1 Size & flags bits 1-2 encode N, use of bits 3-8 is unknown
@ -328,6 +330,102 @@ programs may ignore them entirely. They are stored at the end of the file itself
so the full file needs to be scanned when loaded to find them.
Image Records
-------------
If the file contains images, they follow the text blocks, with each image using a
single block. The 4096-byte record size in the PalmDoc header applies only to
text records; image records may be larger.
Magic Records
-------------
In some cases, MobiPocket Creator adds a 2-zero-byte record after the text
records in a file. This record is not included in the "record count" of text
records in the PalmDoc header, and is also not used as the "first non-book
index" in the MOBI header. (If the 2-zero-byte record is present, the index of
the following block is used as the "first non-book index".)
MobiPocket Creator also ends files with three records: 'FLIS', 'FCIS', and
'end-of-file', in that order. The 'FLIS' and 'FCIS' records do not seem to be
necessary for MobiPocket Reader or the Amazon Kindle 2 to read the file. The
'end-of-file' record might be necessary.
FLIS Record
-----------
The FLIS record appears to have a fixed value. The meaning of the values is not known.
offset bytes content comments
0 4 identifier the characters F L I S (0x46 0x4c 0x49 0x53)
4 4 ? fixed value: 8
8 2 ? fixed value: 65
10 2 ? fixed value: 0
12 4 ? fixed value: 0
16 4 ? fixed value: -1
20 2 ? fixed value: 1
22 2 ? fixed value: 3
24 4 ? fixed value: 3
28 4 ? fixed value: 1
32 4 ? fixed value: -1
FCIS Record
-----------
The FCIS record appears to have mostly fixed values.
offset bytes content comments
0 4 identifier the characters F C I S (0x46 0x43 0x49 0x53)
4 4 ? fixed value: 20
8 4 ? fixed value: 16
12 4 ? fixed value: 1
16 4 ? fixed value: 0
20 4 ? text length (the same value as "text length" in the PalmDoc header)
24 4 ? fixed value: 0
28 4 ? fixed value: 32
32 4 ? fixed value: 8
36 2 ? fixed value: 1
38 2 ? fixed value: 1
40 4 ? fixed value: 0
End-of-file Record
------------------
The end-of-file record is a fixed 4-byte record. While the last two bytes
appear to be a CRLF marker, the meaning of the first two bytes is unknown.
offset bytes content comments
0 1 ? fixed value: 233 (0xe9)
1 1 ? fixed value: 142 (0x8e)
2 1 ? fixed value: 13 (0x0d)
3 1 ? fixed value: 10 (0x0a)
SRCS Record
-----------
kindlegen creates a record whose content is a zip archive of all source files
(i.e., .opf, .ncx, .htm, .jpg, ...) given to the command and puts it in the
generated MOBI file. The record begins with the "SRCS" signature and is
located just before the #End-of-file Record.
MOBI files created with Mobipocket creator, Amazon's Personal Document Service,
or Kindle Direct Publishing (former Amazon DTP) don't include SRCS record.
In a past, kindlegen had an undocumented option to suppress this record, but
the option was removed in 2010.
offset bytes content comments
0 4 identifier "SRCS" (0x53 0x52 0x43 0x53)
4 4 ? fixed value(?): 0x00000010
8 4 ? fixed value(?): 0x0000002f
12 4 ? fixed value(?): 0x00000001
16 zip The zip archive continues to the end of this record
MBP
---

View File

@ -1,8 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class DrawAndCook(BasicNewsRecipe):
title = 'DrawAndCook'
__author__ = 'Starson17'
__version__ = 'v1.10'
__date__ = '13 March 2011'
description = 'Drawings of recipes!'
language = 'en'
publisher = 'Starson17'
@ -13,6 +16,7 @@ class DrawAndCook(BasicNewsRecipe):
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg'
INDEX = 'http://www.theydrawandcook.com'
max_articles_per_feed = 30
remove_attributes = ['style', 'font']
@ -34,20 +38,21 @@ class DrawAndCook(BasicNewsRecipe):
date = ''
current_articles = []
soup = self.index_to_soup(url)
recipes = soup.findAll('div', attrs={'class': 'date-outer'})
featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'})
recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
for recipe in recipes:
title = recipe.h3.a.string
page_url = recipe.h3.a['href']
page_url = self.INDEX + recipe.a['href']
print 'page_url is: ', page_url
title = recipe.find('strong').string
print 'title is: ', title
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date})
return current_articles
keep_only_tags = [dict(name='h3', attrs={'class':'post-title entry-title'})
,dict(name='div', attrs={'class':'post-body entry-content'})
keep_only_tags = [dict(name='h1', attrs={'id':'page_title'})
,dict(name='section', attrs={'id':'artwork'})
]
remove_tags = [dict(name='div', attrs={'class':['separator']})
,dict(name='div', attrs={'class':['post-share-buttons']})
remove_tags = [dict(name='article', attrs={'id':['recipe_actions', 'metadata']})
]
extra_css = '''

79
recipes/caijing.recipe Normal file
View File

@ -0,0 +1,79 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Caijing(BasicNewsRecipe):
title = 'Caijing Magazine'
__author__ = 'Eric Chen'
description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
Magazine has firmly established itself as a news authority and leading voice for
business and financial issues in China.
CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
developments and policy changes, as well as major events in the capital markets. It also
offers a broad international perspective through first-hand reporting on international
political and economic issues.
CAIJING Magazine is China's most widely read business and finance magazine, with a
circulation of 225,000 per issue. It boasts top-level readers from government, business
and academic circles. '''
language = 'zh'
category = 'news, China'
encoding = 'UTF-8'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = True
remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
dict(name=['script', 'noscript', 'style'])]
no_stylesheets = True
remove_javascript = True
current_issue_url = ""
current_issue_cover = ""
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://service.caijing.com.cn/usermanage/login')
br.select_form(name='mainLoginForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
div = soup0.find('div', attrs={'class':'fmcon'})
link = div.find('a', href=True)
current_issue_url = link['href']
soup = self.index_to_soup(current_issue_url)
for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
self.current_issue_cover = div_cover['src']
feeds = []
for section in soup.findAll('div', attrs={'class':'cebd'}):
section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
articles = []
for post in section.findAll('a', href=True):
if re.search('\d{4}-\d{2}-\d{2}', post['href']):
date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
id = re.search('\d{9}', post['href']).group(0)
url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
url = url + id + '&time=' + date + '&cl=106&page=all'
title = self.tag_to_string(post)
articles.append({'title':title, 'url':url, 'date':date})
if articles:
feeds.append((section_title, articles))
return feeds
def get_cover_url(self):
return self.current_issue_cover

Some files were not shown because too many files have changed in this diff Show More