mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
f3794991fc
@ -12,6 +12,7 @@ resources/images.qrc
|
|||||||
resources/scripts.pickle
|
resources/scripts.pickle
|
||||||
resources/ebook-convert-complete.pickle
|
resources/ebook-convert-complete.pickle
|
||||||
resources/builtin_recipes.xml
|
resources/builtin_recipes.xml
|
||||||
|
resources/builtin_recipes.zip
|
||||||
setup/installer/windows/calibre/build.log
|
setup/installer/windows/calibre/build.log
|
||||||
src/calibre/translations/.errors
|
src/calibre/translations/.errors
|
||||||
src/cssutils/.svn/
|
src/cssutils/.svn/
|
||||||
|
204
Changelog.yaml
204
Changelog.yaml
@ -19,6 +19,210 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.7.54
|
||||||
|
date: 2011-04-08
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "New output format, HTMLZ which is a single HTML file with its associated images/stylesheets in a zipped up file"
|
||||||
|
description: "Useful when you want to convert your ebook into a single HTML file for easy editing. Note that this output plugin is still new and needs testing"
|
||||||
|
|
||||||
|
- title: "When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa"
|
||||||
|
|
||||||
|
- title: "Support for the Motorola Atrix"
|
||||||
|
|
||||||
|
- title: "Allow the icons in the toolbar to be turned off completely via Preferences->Look & Feel"
|
||||||
|
|
||||||
|
- title: "When downloading metadata use the gzip transfer encoding when possible for a speedup."
|
||||||
|
tickets: [749304]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Conversion pipeline: Workaround for bug in lxml that causes a massive mem leak on windows and OS X when the input document contains non ASCII CSS selectors."
|
||||||
|
tickets: [754555]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Handle inline <style> tags that put all the actual CSS inside an XML comment."
|
||||||
|
tickets: [750063]
|
||||||
|
|
||||||
|
- title: "The 'Choose Library' button now shows its popup menu when you already have more than one library instead of the dialog to create a new library"
|
||||||
|
tickets: [754154]
|
||||||
|
|
||||||
|
- title: "Apply all content server setting when clicking the Start Server button in Preferences->Sharing over the net"
|
||||||
|
tickets: [753122]
|
||||||
|
|
||||||
|
- title: "Fix content server breaking if its restriction is set to a saved search that was deleted"
|
||||||
|
tickets: [751950]
|
||||||
|
|
||||||
|
- title: "Fix detection of PocketBook with 2.0.6 firmware on windows"
|
||||||
|
tickets: [750336]
|
||||||
|
|
||||||
|
- title: "ODT Input: Fix handling of the <text:s> element."
|
||||||
|
tickets: [749655]
|
||||||
|
|
||||||
|
- title: "MOBI Output: Don't use self closed tags"
|
||||||
|
|
||||||
|
- title: "Fix book details popup becoming too tall if there is a lot of metadata"
|
||||||
|
|
||||||
|
- title: "Fix new PDF engine crashing on PDF files with embedded fonts with null names"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Kommersant
|
||||||
|
- Perfil
|
||||||
|
- Times of India
|
||||||
|
- IHT
|
||||||
|
- Guardian
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Al Ahram"
|
||||||
|
authors: Hassan Williamson
|
||||||
|
|
||||||
|
- title: "F-Secure and developpez.com"
|
||||||
|
authors: louhike
|
||||||
|
|
||||||
|
- version: 0.7.53
|
||||||
|
date: 2011-04-01
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So you can specift the title/authors/series/whatever in the template."
|
||||||
|
tickets: [743535]
|
||||||
|
|
||||||
|
- title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
|
||||||
|
|
||||||
|
- title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
|
||||||
|
|
||||||
|
- title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins specified in calibre are more likely to be the actual margins used."
|
||||||
|
|
||||||
|
- title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
|
||||||
|
tickets: [744020]
|
||||||
|
|
||||||
|
- title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
|
||||||
|
tickets: [742686]
|
||||||
|
|
||||||
|
- title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
|
||||||
|
tickets: [743486]
|
||||||
|
|
||||||
|
- title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
|
||||||
|
tickets: [743533]
|
||||||
|
|
||||||
|
- title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
|
||||||
|
tickets: [743645]
|
||||||
|
|
||||||
|
- title: "FB2 Output: Option to set the FB2 genre explicitly."
|
||||||
|
tickets: [743178]
|
||||||
|
|
||||||
|
- title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix text color in the search bar set to black instead of the system font color"
|
||||||
|
tickets: [746846]
|
||||||
|
|
||||||
|
- title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
|
||||||
|
tickets: [745428]
|
||||||
|
|
||||||
|
- title: "Make sorting on the device view faster and more robust."
|
||||||
|
tickets: [742626]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
|
||||||
|
tickets: [745001]
|
||||||
|
|
||||||
|
- title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
|
||||||
|
tickets: [744365]
|
||||||
|
|
||||||
|
- title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
|
||||||
|
tickets: [744122]
|
||||||
|
|
||||||
|
- title: "RTF Input: Handle RTF files with too many levels of list nesting."
|
||||||
|
tickets: [743243]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Irish Times
|
||||||
|
- LifeHacker
|
||||||
|
- Estadao
|
||||||
|
- Folha de Sao Paulo
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Financieele Dagblad
|
||||||
|
author: marvin_2
|
||||||
|
|
||||||
|
- title: "Prost Amerika, WV Hooligan and SB Nation"
|
||||||
|
author: rylsfan
|
||||||
|
|
||||||
|
- title: "Cracked.com"
|
||||||
|
author: Nudgenudge
|
||||||
|
|
||||||
|
- version: 0.7.52
|
||||||
|
date: 2011-03-25
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fixes a typo in 0.7.51 that broke the downloading of some news. Apologies."
|
||||||
|
tickets: [742840]
|
||||||
|
|
||||||
|
- version: 0.7.51
|
||||||
|
date: 2011-03-25
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Conversion: Detect and remove fake page margins that are specified as a margin on (nearly) every paragraph."
|
||||||
|
description: "This can be turned off via an option under Structure Detection, in case it removes margins that should have been kept."
|
||||||
|
|
||||||
|
- title: "Windows build: All the python code and recipes are now put into zip files. This should decrease the amount of time the windows installer spends 'calculating free space'"
|
||||||
|
|
||||||
|
- title: "OSX and Linux: Add a setting in Preferences->Behavior to control the priority with which calibre worker processes run. This setting was already available on windows."
|
||||||
|
tickets: [741231]
|
||||||
|
|
||||||
|
- title: "Driver for HTC Thunderbolt, T-Mobile Optimus, Archos 43 and Blackberry OS6"
|
||||||
|
|
||||||
|
- title: "A new 'authors type' custom column"
|
||||||
|
|
||||||
|
- title: "When building calibre from source note that calibre now absolutely requires python >= 2.7"
|
||||||
|
|
||||||
|
- title: "Add the keyboard shortcut: Ctrl+Shift+R to restart calibre in debug mode"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix dragging and dropping lots of books from the book list to the Tag Browser was broken"
|
||||||
|
|
||||||
|
- title: "Change the shebang in the calibre launcher script on linux to explicitly use python2 rather than python"
|
||||||
|
|
||||||
|
- title: "When adding formats do not corrupt the added file if the user tries to add an existing format to itself"
|
||||||
|
|
||||||
|
- title: "Fix drag and drop to add files that contain the # character in the filename"
|
||||||
|
|
||||||
|
- title: "Tag editor shouldn't add empty tags"
|
||||||
|
tickets: [740890]
|
||||||
|
|
||||||
|
- title: "MOBI Input: Handle MOBI files that have a too large 'number of records' field in their headers."
|
||||||
|
tickets: [740713]
|
||||||
|
|
||||||
|
- title: "News download: Update RSS feedparser module to latest version"
|
||||||
|
|
||||||
|
- title: "Various fixes to the zipfile module in calibre to handle 64 bit zipfiles and bring it up to date with the zip file module in the python stdlib"
|
||||||
|
|
||||||
|
- title: "News download: Handle titles with ASCII control codes in them."
|
||||||
|
tickets: [739322]
|
||||||
|
|
||||||
|
- title: "Make search hierarchies show simple names instead of compound ones."
|
||||||
|
|
||||||
|
- title: "Fix commas in author names being converted to pipe symbols in the book details window"
|
||||||
|
|
||||||
|
- title: "Fix PocketBook can't always find epub cover image to create thumbnail"
|
||||||
|
tickets: [9445]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- "168 ora"
|
||||||
|
- "LWN weekly"
|
||||||
|
- Christian Science Monitor
|
||||||
|
- Washington Post
|
||||||
|
- West Hawaii Today
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Planet KDE"
|
||||||
|
author: Riccardo Iaconelli
|
||||||
|
|
||||||
|
- title: "HVG"
|
||||||
|
author: Istvan Papp
|
||||||
|
|
||||||
|
- title: "Caijing Magazine"
|
||||||
|
auhtor: Eric Chen
|
||||||
|
|
||||||
|
|
||||||
- version: 0.7.50
|
- version: 0.7.50
|
||||||
date: 2011-03-18
|
date: 2011-03-18
|
||||||
|
|
||||||
|
8
INSTALL
8
INSTALL
@ -1,6 +1,9 @@
|
|||||||
calibre supports installation from source, only on Linux.
|
calibre supports installation from source, only on Linux.
|
||||||
On Windows and OS X use the provided installers and use
|
|
||||||
the facilities of the calibre-debug command to hack on the calibre source.
|
Note that you *do not* need to install from source to hack on
|
||||||
|
the calibre source code. To get started with calibre development,
|
||||||
|
use a normal calibre install and follow the instructions at
|
||||||
|
http://calibre-ebook.com/user_manual/develop.html
|
||||||
|
|
||||||
On Linux, there are two kinds of installation from source possible.
|
On Linux, there are two kinds of installation from source possible.
|
||||||
Note that both kinds require lots of dependencies as well as a
|
Note that both kinds require lots of dependencies as well as a
|
||||||
@ -45,3 +48,4 @@ This type of install can be run with the command::
|
|||||||
sudo python setup.py develop
|
sudo python setup.py develop
|
||||||
|
|
||||||
Use the -h flag for help on the develop command.
|
Use the -h flag for help on the develop command.
|
||||||
|
|
||||||
|
2
README
2
README
@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
|
|||||||
For screenshots: https://calibre-ebook.com/demo
|
For screenshots: https://calibre-ebook.com/demo
|
||||||
|
|
||||||
For installation/usage instructions please see
|
For installation/usage instructions please see
|
||||||
http://calibre-ebook.com
|
http://calibre-ebook.com/user_manual
|
||||||
|
|
||||||
For source code access:
|
For source code access:
|
||||||
bzr branch lp:calibre
|
bzr branch lp:calibre
|
||||||
|
@ -210,21 +210,23 @@ record type usual length name comments
|
|||||||
114 versionnumber
|
114 versionnumber
|
||||||
115 sample
|
115 sample
|
||||||
116 startreading
|
116 startreading
|
||||||
118 retail price (as text)
|
117 3 adult Mobipocket Creator adds this if Adult only is checked; contents: "yes"
|
||||||
119 retail price currency (as text)
|
118 retail price As text, e.g. "4.99"
|
||||||
201 coveroffset
|
119 retail price currency As text, e.g. "USD"
|
||||||
202 thumboffset
|
201 4 coveroffset Add to first image field in Mobi Header to find PDB record containing the cover image
|
||||||
|
202 4 thumboffset Add to first image field in Mobi Header to find PDB record containing the thumbnail cover image
|
||||||
203 hasfakecover
|
203 hasfakecover
|
||||||
204 204 Unknown
|
204 4 Creator Software Records 204-207 are usually the same for all books from a certain source, e.g. 1-6-2-41 for Baen and 201-1-0-85 for project gutenberg, 200-1-0-85 for amazon when converted to a 32 bit integer.
|
||||||
205 205 Unknown
|
205 4 Creator Major Version
|
||||||
206 206 Unknown
|
206 4 Creator Minor Version
|
||||||
207 207 Unknown
|
207 4 Creator Build Number
|
||||||
208 208 Unknown
|
208 watermark
|
||||||
300 300 Unknown
|
209 tamper proof keys Used by the Kindle (and Android app) for generating book-specific PIDs.
|
||||||
401 clippinglimit
|
300 fontsignature
|
||||||
|
401 1 clippinglimit
|
||||||
402 publisherlimit
|
402 publisherlimit
|
||||||
403 403 Unknown
|
403 403 Unknown 1 - Text to Speech disabled; 0 - Text to Speech enabled
|
||||||
404 404 ttsflag
|
404 1 404 ttsflag
|
||||||
501 4 cdetype PDOC - Personal Doc;
|
501 4 cdetype PDOC - Personal Doc;
|
||||||
EBOK - ebook;
|
EBOK - ebook;
|
||||||
502 lastupdatetime
|
502 lastupdatetime
|
||||||
@ -287,9 +289,9 @@ content at the beginning of the following record. The trailing entry ends with
|
|||||||
a byte containing a count of the overlapping bytes plus additional flags.
|
a byte containing a count of the overlapping bytes plus additional flags.
|
||||||
|
|
||||||
offset bytes content comments
|
offset bytes content comments
|
||||||
0 0-3 N terminal bytes
|
0 0-3 N terminal bytes
|
||||||
of a multibyte
|
of a multibyte
|
||||||
character
|
character
|
||||||
N 1 Size & flags bits 1-2 encode N, use of bits 3-8 is unknown
|
N 1 Size & flags bits 1-2 encode N, use of bits 3-8 is unknown
|
||||||
|
|
||||||
|
|
||||||
@ -328,6 +330,102 @@ programs may ignore them entirely. They are stored at the end of the file itself
|
|||||||
so the full file needs to be scanned when loaded to find them.
|
so the full file needs to be scanned when loaded to find them.
|
||||||
|
|
||||||
|
|
||||||
|
Image Records
|
||||||
|
-------------
|
||||||
|
|
||||||
|
If the file contains images, they follow the text blocks, with each image using a
|
||||||
|
single block. The 4096-byte record size in the PalmDoc header applies only to
|
||||||
|
text records; image records may be larger.
|
||||||
|
|
||||||
|
|
||||||
|
Magic Records
|
||||||
|
-------------
|
||||||
|
|
||||||
|
In some cases, MobiPocket Creator adds a 2-zero-byte record after the text
|
||||||
|
records in a file. This record is not included in the "record count" of text
|
||||||
|
records in the PalmDoc header, and is also not used as the "first non-book
|
||||||
|
index" in the MOBI header. (If the 2-zero-byte record is present, the index of
|
||||||
|
the following block is used as the "first non-book index".)
|
||||||
|
|
||||||
|
MobiPocket Creator also ends files with three records: 'FLIS', 'FCIS', and
|
||||||
|
'end-of-file', in that order. The 'FLIS' and 'FCIS' records do not seem to be
|
||||||
|
necessary for MobiPocket Reader or the Amazon Kindle 2 to read the file. The
|
||||||
|
'end-of-file' record might be necessary.
|
||||||
|
|
||||||
|
|
||||||
|
FLIS Record
|
||||||
|
-----------
|
||||||
|
|
||||||
|
The FLIS record appears to have a fixed value. The meaning of the values is not known.
|
||||||
|
|
||||||
|
offset bytes content comments
|
||||||
|
0 4 identifier the characters F L I S (0x46 0x4c 0x49 0x53)
|
||||||
|
4 4 ? fixed value: 8
|
||||||
|
8 2 ? fixed value: 65
|
||||||
|
10 2 ? fixed value: 0
|
||||||
|
12 4 ? fixed value: 0
|
||||||
|
16 4 ? fixed value: -1
|
||||||
|
20 2 ? fixed value: 1
|
||||||
|
22 2 ? fixed value: 3
|
||||||
|
24 4 ? fixed value: 3
|
||||||
|
28 4 ? fixed value: 1
|
||||||
|
32 4 ? fixed value: -1
|
||||||
|
|
||||||
|
|
||||||
|
FCIS Record
|
||||||
|
-----------
|
||||||
|
|
||||||
|
The FCIS record appears to have mostly fixed values.
|
||||||
|
|
||||||
|
offset bytes content comments
|
||||||
|
0 4 identifier the characters F C I S (0x46 0x43 0x49 0x53)
|
||||||
|
4 4 ? fixed value: 20
|
||||||
|
8 4 ? fixed value: 16
|
||||||
|
12 4 ? fixed value: 1
|
||||||
|
16 4 ? fixed value: 0
|
||||||
|
20 4 ? text length (the same value as "text length" in the PalmDoc header)
|
||||||
|
24 4 ? fixed value: 0
|
||||||
|
28 4 ? fixed value: 32
|
||||||
|
32 4 ? fixed value: 8
|
||||||
|
36 2 ? fixed value: 1
|
||||||
|
38 2 ? fixed value: 1
|
||||||
|
40 4 ? fixed value: 0
|
||||||
|
|
||||||
|
|
||||||
|
End-of-file Record
|
||||||
|
------------------
|
||||||
|
|
||||||
|
The end-of-file record is a fixed 4-byte record. While the last two bytes
|
||||||
|
appear to be a CRLF marker, the meaning of the first two bytes is unknown.
|
||||||
|
|
||||||
|
offset bytes content comments
|
||||||
|
0 1 ? fixed value: 233 (0xe9)
|
||||||
|
1 1 ? fixed value: 142 (0x8e)
|
||||||
|
2 1 ? fixed value: 13 (0x0d)
|
||||||
|
3 1 ? fixed value: 10 (0x0a)
|
||||||
|
|
||||||
|
|
||||||
|
SRCS Record
|
||||||
|
-----------
|
||||||
|
|
||||||
|
kindlegen creates a record whose content is a zip archive of all source files
|
||||||
|
(i.e., .opf, .ncx, .htm, .jpg, ...) given to the command and puts it in the
|
||||||
|
generated MOBI file. The record begins with the "SRCS" signature and is
|
||||||
|
located just before the #End-of-file Record.
|
||||||
|
|
||||||
|
MOBI files created with Mobipocket creator, Amazon's Personal Document Service,
|
||||||
|
or Kindle Direct Publishing (former Amazon DTP) don't include SRCS record.
|
||||||
|
In a past, kindlegen had an undocumented option to suppress this record, but
|
||||||
|
the option was removed in 2010.
|
||||||
|
|
||||||
|
offset bytes content comments
|
||||||
|
0 4 identifier "SRCS" (0x53 0x52 0x43 0x53)
|
||||||
|
4 4 ? fixed value(?): 0x00000010
|
||||||
|
8 4 ? fixed value(?): 0x0000002f
|
||||||
|
12 4 ? fixed value(?): 0x00000001
|
||||||
|
16 zip The zip archive continues to the end of this record
|
||||||
|
|
||||||
|
|
||||||
MBP
|
MBP
|
||||||
---
|
---
|
||||||
|
|
||||||
|
62
recipes/al_ahram.recipe
Normal file
62
recipes/al_ahram.recipe
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Hassan Williamson <haz at hazrpg.co.uk>'
|
||||||
|
'''
|
||||||
|
ahram.org.eg
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AlAhram(BasicNewsRecipe):
|
||||||
|
title = 'Al-Ahram'
|
||||||
|
__author__ = 'Hassan Williamson'
|
||||||
|
description = 'News from Egypt in Arabic.'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Al-Ahram'
|
||||||
|
category = 'News'
|
||||||
|
language = 'ar'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .txtTitle{ font-weight: bold; } '
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['bbcolright']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['bbnav', 'bbsp']}),
|
||||||
|
dict(name='div', attrs={'id':['AddThisButton']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_attributes = [
|
||||||
|
'width','height'
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'الأولى', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=25'),
|
||||||
|
(u'مصر', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=27'),
|
||||||
|
(u'المحافظات', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=29'),
|
||||||
|
(u'الوطن العربي', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=31'),
|
||||||
|
(u'العالم', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=26'),
|
||||||
|
(u'تقارير المراسلين', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=2'),
|
||||||
|
(u'تحقيقات', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=3'),
|
||||||
|
(u'قضايا واراء', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=4'),
|
||||||
|
(u'اقتصاد', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=5'),
|
||||||
|
(u'رياضة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=6'),
|
||||||
|
(u'حوادث', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=38'),
|
||||||
|
(u'دنيا الثقافة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=7'),
|
||||||
|
(u'المراة والطفل', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=8'),
|
||||||
|
(u'يوم جديد', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=9'),
|
||||||
|
(u'الكتاب', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=10'),
|
||||||
|
(u'الاعمدة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=11'),
|
||||||
|
(u'أراء حرة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=59'),
|
||||||
|
(u'ملفات الاهرام', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=12'),
|
||||||
|
(u'بريد الاهرام', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=15'),
|
||||||
|
(u'الاخيرة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=16'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
@ -3,8 +3,7 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
|
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
|
||||||
__version__ = '0.97'
|
__version__ = '0.98' # 2011-04-10
|
||||||
|
|
||||||
''' http://brandeins.de - Wirtschaftsmagazin '''
|
''' http://brandeins.de - Wirtschaftsmagazin '''
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
@ -14,8 +13,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
class BrandEins(BasicNewsRecipe):
|
class BrandEins(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'brand eins'
|
title = u'brand eins'
|
||||||
__author__ = 'Constantin Hofstetter'
|
__author__ = 'Constantin Hofstetter; Steffen Siebert'
|
||||||
description = u'Wirtschaftsmagazin'
|
description = u'Wirtschaftsmagazin: Gets the last full issue on default. Set a integer value for the username-field to get older issues: 1 -> the newest (but not complete) issue, 2 -> the last complete issue (default), 3 -> the issue before 2 etc.'
|
||||||
publisher ='brandeins.de'
|
publisher ='brandeins.de'
|
||||||
category = 'politics, business, wirtschaft, Germany'
|
category = 'politics, business, wirtschaft, Germany'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
79
recipes/caijing.recipe
Normal file
79
recipes/caijing.recipe
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class Caijing(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Caijing Magazine'
|
||||||
|
__author__ = 'Eric Chen'
|
||||||
|
|
||||||
|
description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
|
||||||
|
Magazine has firmly established itself as a news authority and leading voice for
|
||||||
|
business and financial issues in China.
|
||||||
|
CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
|
||||||
|
developments and policy changes, as well as major events in the capital markets. It also
|
||||||
|
offers a broad international perspective through first-hand reporting on international
|
||||||
|
political and economic issues.
|
||||||
|
CAIJING Magazine is China's most widely read business and finance magazine, with a
|
||||||
|
circulation of 225,000 per issue. It boasts top-level readers from government, business
|
||||||
|
and academic circles. '''
|
||||||
|
language = 'zh'
|
||||||
|
category = 'news, China'
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
|
||||||
|
'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
|
||||||
|
'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
|
||||||
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
current_issue_url = ""
|
||||||
|
current_issue_cover = ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://service.caijing.com.cn/usermanage/login')
|
||||||
|
br.select_form(name='mainLoginForm')
|
||||||
|
br['username'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
articles = []
|
||||||
|
soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
|
||||||
|
div = soup0.find('div', attrs={'class':'fmcon'})
|
||||||
|
link = div.find('a', href=True)
|
||||||
|
current_issue_url = link['href']
|
||||||
|
|
||||||
|
soup = self.index_to_soup(current_issue_url)
|
||||||
|
|
||||||
|
for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
|
||||||
|
if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
|
||||||
|
self.current_issue_cover = div_cover['src']
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
for section in soup.findAll('div', attrs={'class':'cebd'}):
|
||||||
|
section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
|
||||||
|
articles = []
|
||||||
|
for post in section.findAll('a', href=True):
|
||||||
|
if re.search('\d{4}-\d{2}-\d{2}', post['href']):
|
||||||
|
date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
|
||||||
|
id = re.search('\d{9}', post['href']).group(0)
|
||||||
|
url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
|
||||||
|
url = url + id + '&time=' + date + '&cl=106&page=all'
|
||||||
|
|
||||||
|
title = self.tag_to_string(post)
|
||||||
|
articles.append({'title':title, 'url':url, 'date':date})
|
||||||
|
|
||||||
|
if articles:
|
||||||
|
feeds.append((section_title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return self.current_issue_cover
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user