Pull from trunk

This commit is contained in:
Kovid Goyal 2010-07-09 21:54:24 -06:00
commit 8e46a15325
217 changed files with 91880 additions and 69270 deletions

View File

@ -4,6 +4,391 @@
# for important features/bug fixes. # for important features/bug fixes.
# Also, each release can have new and improved recipes. # Also, each release can have new and improved recipes.
- version: 0.7.8
date: 2010-07-09
new features:
- title: "New tool to help prepare EPUBs for publication"
type: major
description: >
"calibre now contains a new command line tool called epub-fix that can automatically fix
common problems in EPUB files that cause them to be rejected by poorly designed publishing services.
The tool is plugin based for extensible functionality in the future. Currently, it can fix unmanifested files
and workaround the date and svg preserveaspectratio bugs of epubcheck."
- title: "New icons for the toolbar buttons by Kamil Tatara"
- title: "Display rating (when available) in cover browser"
- title: "Clicking on the central cover int the cover browser now opens that book in the viewer"
- title: "Use the status bar instead of the area to the right of the location view to display status information"
- title: "Driver for the Pandigital Novel e-book reader"
bug fixes:
- title: "News download: Don not specify a font family for article descriptions"
- title: "News download: Fix regression introduced in 0.7.0 that broke download of some embedded content feeds"
- title: "MOBI Output: Partial support for nested superscript and subscripts."
tickets: [6132]
- title: "CHM Input: Fix handling of buggy CHM files with no .hhc"
tickets: [6087]
- title: "EPUB Input: Fix bug in unzipping EPUB files that have been zipped in depth first order."
tickets: [6127]
- title: "TXT Input: Convert HTML entities to characters."
tickets: [6114]
- title: "LRF Input: Handle LRF files with random null bytes in the text"
tickets: [6097]
- title: "Kobo driver: Fix detection of txt/html files on the device"
- title: "Fix opening of books when calibre library is on an unmapped network share in windows"
- title: "SONY driver: Only update the timestamp in the XML db for newly added books"
- title: "Cover browser: Fix rendering of center cover when width of cover browser is less than the width of a single cover"
- title: "Cover browser: Correct fix for setPixel out of bounds warning causing UI slowdown in calibre"
new recipes:
- title: "evz.ro"
author: Darko Miletic
- title: "Anchorage Daily News, China Economic Net, BBC Chinese and Singtao Daily"
author: rty
- title: Big Oven
author: Starson17
improved recipes:
- Haaretz
- Editor and Publisher
- Estadao
- version: 0.7.7
date: 2010-07-02
new features:
- title: "Support for the Nokia E52"
- title: "Searching on the size column"
- title: "iTunes driver: Add option to disable cover fetching for speeding up the fetching of large book collections"
bug fixes:
- title: "SONY driver: Only update metadata when books are sent to device."
- title: "TXT Input: Ensure the generated html is splittable"
tickets: [5904]
- title: "Fix infinite loop in default cover generation."
tickets: [6061]
- title: "HTML Input: Fix a parsing bug that was triggered in rare conditions"
tickets: [6064]
- title: "HTML2Zip plugin: Do not replace ligatures"
tickets: [6019]
- title: "iTunes driver: Fix transmission of non integral series numbers"
tickets: [6046]
- title: "Simplify implementation of cover caching and ensure cover browser is updated when covers are changed"
- title: "PDF metadata: Fix last character corrupted when setting metadata in encrypted files."
- title: "PDF metadata: Update the version of PoDoFo used to set metadata to 0.8.1. Hopefully that means more PDF files will work"
- title: "Device drivers: Speedup for dumping metadata cache to devices on Windows XP"
- title: "EPUB Output: Ensure that language setting is conformant to the specs"
- title: "MOBI Output: Fix a memory leak and a crash in the palmdoc compression routine"
- title: "Metadata download: Fix a regression that resulted in a failed download for some books"
new recipes:
- title: "Foreign Policy and Alo!"
author: Darko Miletic
- title: Statesman and ifzm
author: rty
improved recipes:
- Akter
- The Old New Thing
- version: 0.7.6
date: 2010-06-28
new features:
- title: "Add support for the new firmware of the Azbooka"
tickets: [5994]
- title: "A few speedups for calibre startup, should add up to a few seconds of startup time on slower machines"
- title: "Support for the Sweem MM300"
- title: "Add keyboard shorcut for Download metadata and covers"
bug fixes:
- title: "Fix regression in 0.7.5 that broke conversion of malformed HTML files (like those Microsoft Word outputs)"
type: major
tickets: [5991]
- title: "Don't download tags from librarything, as the tagging there is not very good"
- title: "Add mimetype for FB2 so that it can be served by the content server"
tickets: [6011]
- title: "Ensure cover is not resized to less than the available space in the Edit Meta Information dialog"
tickets: [6001]
- title: "SONY driver: Only update collections when sending book to device for the first time"
- title: "calibre should now work on windows when the location for the library contains non-ascii characters"
tickets: [5983]
- title: "Cover browser once again distorts instead of cropping covers that have an incorrect aspect ratio"
- title: "ISBNDb metadata plugin: Fix bug causing only first page of results to be fetched"
- title: "Move iTunes driver to the bottom so that it doesn't interfere with device detection for people that have iphones and an ereader plugged in"
improved recipes:
- Houston Chronicle
- Hindu
- Times of India
- New York Times
new recipes:
- title: Winnipeg Sun
author: rty
- version: 0.7.5
date: 2010-06-25
new features:
- title: "New driver for the Kobo featuring closer integration with the device."
- title: "Support for the Dell Streak, Eken Android tablet and the Astak Mentor EB600"
- title: "New series type custom column"
- title: "Add option in Send to device menu to connect to iTunes without any iDevice (experimental)"
- title: "iPad driver: Make setting iTunes Category from series optional. News download now optimizations for iPad output."
- title: "Add option to disable book cover animation"
tickets: [5909]
- title: "Edit meta information dialog: Remember last used size and splitter position."
tickets: [5908]
- title: "Metadata download: If any results have a published date, ensure they all do"
- title: "SONY driver: Add a preference setting in Preferences->Add/Save->Send to device to control how colelctions are managed on the device by calibre"
- title: "Metadata download: Filter out non book results. Also sort results by availability of covers for the isbn"
tickets: [5946]
- title: "Bulk editing for device collections in the device view via the context menu"
bug fixes:
- title: "When converting books using the calibre GUI, set the language of the output book to be the same as the language of the User Interface, instead of undefined. Fixes use of dictionary in iBooks"
- title: "PDF Output: Fix setting top/bottom margnis has no effect"
- title: "Conversion pipeline: Fix typo causing remove footer regex to always fail"
- title: "Handle device being yanked with queued device jobs gracefully"
- title: "Conversion pipeline: Handle deeply nested XML structures"
tickets: [5931]
- title: "Conversion pipeline: Fix handling of lists with a specified left margin"
tickets: [5877]
- title: "Restore workaround for ADE buggy rendering of anchors as links. However, make it overridable by extra CSS"
- title: "Fix LibraryThing metadata download plugin"
- title: "Fix multiple ratings displayed in Tag Browser for some legacy databases"
- title: "Fix invocation of postprocess file type plugins plugins"
- title: "HTML Input: Handle @import directives in linked css files."
tickets: [5135]
- title: "HTML Input: Handle absolute paths in resource links on windows correctly."
tickets: [3031]
- title: "E-book viewer: Handle font-face rules specify multiple families to be substituted"
- title: "Cover browser: Set aspect ratio of covers to 3:4 instead of 2:3. Crop rather than distort covers whoose aspect ratio is different from this. Antialias the rendering of the central cover"
- title: "Reset Tag browser if the text in the search box is edited"
- title: "Fix detection of SD card in Samsung Galaxy windows driver"
new recipes:
- title: "L'Osservatore Romano"
author: Darko Miletic
- title: China Press, London Free Press, People Daily
author: rty
improved recipes:
- Zaobao
- New Scientist
- National Post
- London review of books
- version: 0.7.4
date: 2010-06-19
bug fixes:
- title: "Fix regression in 0.7.3 that broke creating custom columns of rating or text types"
- title: "Fix cover browser breaking if you click on a book in the book list while cover browser is animated"
- title: "Fix a bug that could be triggered with the new book details pane if a book has a zero size cover"
tickets: [5889]
- title: "SONY driver: Fix bug preventing the editing of collections in the device view"
new recipes:
- title: Auto Prove
author: Gabriele Marini
- title: Forbes India, Maximum PC, Today Online
author: rty
improved recipes:
- WSJ
- Psychology Today
- version: 0.7.3
date: 2010-06-18
new features:
- title: "The Tag Browser now display an average rating for each item"
type: major
description: >
"
The icons of each individual item in the Tag Browser are now partially colored to indicate the average rating of
all books belonging to that category. For example, the icon next to each author is partially colored based on the
averagerating of all books by that author in your calibre library. You can also hover your mouse over the item to
see the average rating in a tooltip. Can be turned off via Preferences->Interface
"
- title: "Editable author sort for each author"
type: major
description: >
"calibre has always allowed you to specify the author sort for each bookin your collection. Now you
can also specify the way the name of each individual author should be sorted. This is used to display the list
of authors in the Tag Browser and OPDS feeds in the Content Server"
- title: "When downloading metadata, also get series information from librarything.com"
type: major
tickets: [5148]
- title: "Redesign of the Book Details pane"
type: major
description: >
"The Book details pane now display covers with animation. Also instead of showing the full path to the book, you now have
clickable links to open the containing folder or individual formats. The path information is still accessible via a tooltip"
- title: "New User Interface layouts"
type: major
description: >
"calibre now has two user interface layouts selectable from Preferences->Interface. The 'wide' layout has the book details pane on the side
and the 'narrow' layout has it on the bottom. The default layout is now wide."
- title: "You can now add books directly from the device to the calibre library by right clicking on the books in the device views"
- title: "iPad driver: Create category from series preferentially, also handle series sorting"
- title: "SONY driver: Add an option to use author_sort instead of author when sending to device"
- title: "Hitting Enter in the search box now causes the search to be re-run"
tickets: [5856]
- title: "Boox driver: Make destination directory for books customizable"
- title: "Add plugin to download metadata from douban.com. Disabled by default."
- title: "OS X/linux driver for PocketBook 301"
- title: "Support for the Samsung Galaxy and Sigmatek EBK52"
- title: "On startup do not focus the search bar. Instead you can acces the search bar easily by pressing the / key or the standard search keyboard shortcut for your operating system"
bug fixes:
- title: "iPad driver: Various bug fixes"
- title: "Kobo Output profile: Adjust the screen dimensions when converting comics"
- title: "Fix using Preferences when a device is connected causes items in device menu to be disabled"
- title: "CHM Input: Skip files whoose names are too long for windows"
- title: "Brighten up calibre icon on dark backgrounds"
- title: "Ignore 'Unknown' in title/autors when downloading metadata"
tickets: [5633]
- title: "Fix regression that broke various entries in the menus - Preferences, Open containing folder and Edit metadata individually"
- title: "EPUB metadata: Handle comma separated entries in <dc:subject> tags correctly"
tickets: [5855]
- title: "MOBI Output: Fix underlines not being rendered"
tickets: [5830]
- title: "EPUB Output: Remove workaround for old versions of Adobe Digital Editions' faulty rendering of links in html. calibre no longer forces links to be blue and underlined"
- title: "Fix a bug that could cause the show pane buttons to not show hidden panes"
- title: "Fix Tag Editor does not reflect recently changed data in Tag Catagory Text Box"
tickets: [5809]
- title: "Content server: Fix sorting of books by authors instead of author_sort in the main and mobile views"
- title: "Cover cache: Resize covers larger than 600x800 in the cover cache to reduce memory consumption in the GUI"
- title: "EPUB Output: Default cover is generated is now generated as a JPEG instead of PNG32, reducing size by an order of magnitude."
tickets: [5810]
- title: "Cover Browser: Scale text size with height of cover browser. Only show a reflection of half the cover. Also restore rendering quality after regression in 0.7.1"
tickets: [5808]
- title: "Book list: Do not let the default layout have any column wider than 350 pixels"
new recipes:
- title: Akter
author: Darko Miletic
- title: Thai Rath and The Nation (Thailand)
author: Anat Ruangrassamee
improved recipes:
- Wall Street Journal
- New York Times
- Slashdot
- Publico
- Danas
- version: 0.7.2 - version: 0.7.2
date: 2010-06-11 date: 2010-06-11

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 11 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 117 KiB

After

Width:  |  Height:  |  Size: 5.0 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -1752,7 +1752,7 @@
sodipodi:cy="93.331604" sodipodi:cy="93.331604"
sodipodi:cx="-166.53223" sodipodi:cx="-166.53223"
id="path6082" id="path6082"
style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)" style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
sodipodi:type="arc" /></clipPath><radialGradient sodipodi:type="arc" /></clipPath><radialGradient
inkscape:collect="always" inkscape:collect="always"
xlink:href="#linearGradient5990" xlink:href="#linearGradient5990"
@ -2513,7 +2513,7 @@
transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)" transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)"
clip-path="none" /><path clip-path="none" /><path
sodipodi:type="arc" sodipodi:type="arc"
style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)" style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
id="path3915" id="path3915"
sodipodi:cx="-166.53223" sodipodi:cx="-166.53223"
sodipodi:cy="93.331604" sodipodi:cy="93.331604"
@ -2901,22 +2901,8 @@
id="g133"> id="g133">
<defs <defs
id="defs135" /> id="defs135" />
<use
id="use138"
x="0"
y="0"
width="121"
height="120" />
<clipPath <clipPath
id="XMLID_215_"> id="XMLID_215_">
<use
id="use141"
x="0"
y="0"
width="121"
height="120" />
</clipPath> </clipPath>
<g <g
clip-path="url(#XMLID_215_)" clip-path="url(#XMLID_215_)"

Before

Width:  |  Height:  |  Size: 116 KiB

After

Width:  |  Height:  |  Size: 116 KiB

View File

@ -0,0 +1,269 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://web.resource.org/cc/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
version="1.0"
id="Livello_1"
width="128"
height="128"
viewBox="0 0 139 139"
overflow="visible"
enable-background="new 0 0 139 139"
xml:space="preserve"
sodipodi:version="0.32"
inkscape:version="0.45+devel"
sodipodi:docname="system-help.svgz"
inkscape:output_extension="org.inkscape.output.svgz.inkscape"
style="overflow:visible"><metadata
id="metadata3164"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
id="defs3162"><filter
inkscape:collect="always"
x="-0.132641"
width="1.265282"
y="-0.34752154"
height="1.6950431"
id="filter3547"><feGaussianBlur
inkscape:collect="always"
stdDeviation="2.7512044"
id="feGaussianBlur3549" /></filter><filter
inkscape:collect="always"
id="filter5097"><feGaussianBlur
inkscape:collect="always"
stdDeviation="2.32"
id="feGaussianBlur5099" /></filter><filter
inkscape:collect="always"
x="-0.143268"
width="1.286536"
y="-0.072184406"
height="1.1443688"
id="filter5125"><feGaussianBlur
inkscape:collect="always"
stdDeviation="1.91024"
id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
inkscape:window-height="697"
inkscape:window-width="1024"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
guidetolerance="10.0"
gridtolerance="10.0"
objecttolerance="10.0"
borderopacity="1.0"
bordercolor="#666666"
pagecolor="#ffffff"
id="base"
inkscape:zoom="2.9352518"
inkscape:cx="99.496726"
inkscape:cy="69.329657"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:current-layer="Livello_1"
height="128px"
width="128px" />
<filter
id="AI_Sfocatura_4">
<feGaussianBlur
stdDeviation="4"
id="feGaussianBlur3096" />
</filter>
<filter
id="AI_Sfocatura_2">
<feGaussianBlur
stdDeviation="2"
id="feGaussianBlur3099" />
</filter>
<radialGradient
id="XMLID_12_"
cx="69.600098"
cy="69.576698"
r="58"
gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
style="stop-color:#000000"
id="stop3102" />
<stop
offset="1"
style="stop-color:#000000;stop-opacity:0;"
id="stop3104" />
</radialGradient>
<circle
sodipodi:ry="58"
sodipodi:rx="58"
sodipodi:cy="69.599998"
sodipodi:cx="69.599998"
style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
id="circle5091"
r="58"
cy="69.599998"
cx="69.599998"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
cx="69.599998"
cy="122.173"
rx="58"
ry="10.573"
id="ellipse3106"
style="opacity:0.6;fill:url(#XMLID_12_)"
sodipodi:cx="69.599998"
sodipodi:cy="122.173"
sodipodi:rx="58"
sodipodi:ry="10.573"
transform="translate(-9.9998474e-2,1.9102535)" />
<radialGradient
id="XMLID_13_"
cx="69.600098"
cy="69.600098"
r="58"
gradientUnits="userSpaceOnUse">
<stop
offset="0.6154"
style="stop-color:#EEEEEE"
id="stop3113" />
<stop
offset="0.8225"
style="stop-color:#DDDDDD"
id="stop3115" />
<stop
offset="1"
style="stop-color:#FFFFFF"
id="stop3117" />
</radialGradient>
<circle
cx="69.599998"
cy="69.599998"
r="58"
id="circle3119"
style="fill:url(#XMLID_13_)"
sodipodi:cx="69.599998"
sodipodi:cy="69.599998"
sodipodi:rx="58"
sodipodi:ry="58"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
<linearGradient
id="XMLID_14_"
gradientUnits="userSpaceOnUse"
x1="27.6001"
y1="69.600098"
x2="111.6001"
y2="69.600098"
gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<stop
offset="0"
style="stop-color:#2A94EC"
id="stop3122" />
<stop
offset="1"
style="stop-color:#0057AE"
id="stop3124" />
</linearGradient>
<path
d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
id="path3126"
style="fill:url(#XMLID_14_)" />
<g
id="circle22111"
cy="92"
rx="36"
ry="36"
cx="343.99899"
enable-background="new "
style="opacity:0.3;filter:url(#filter3547)"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<path
d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
id="path3129"
style="fill:#a8dde0" />
</g>
<linearGradient
id="circle16776_1_"
gradientUnits="userSpaceOnUse"
x1="135.5601"
y1="417.66461"
x2="161.87621"
y2="417.66461"
gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
<stop
offset="0"
style="stop-color:#FFFFFF"
id="stop3132" />
<stop
offset="1"
style="stop-color:#ffffff;stop-opacity:0;"
id="stop3134" />
</linearGradient>
<path
id="circle16776"
enable-background="new "
d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
style="opacity:0.8;fill:url(#circle16776_1_)" />
<g
id="g3137"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<defs
id="defs3139"><path
id="XMLID_10_"
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
<clipPath
id="XMLID_6_">
<use
xlink:href="#XMLID_10_"
id="use3143"
x="0"
y="0"
width="139"
height="139" />
</clipPath>
<g
clip-path="url(#XMLID_6_)"
id="g3145"
style="filter:url(#AI_Sfocatura_2)">
<path
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
id="path3147"
style="fill:none;stroke:#00316e;stroke-width:2" />
</g>
</g>
<g
transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
id="g5119"
style="fill:#00316e;filter:url(#filter5125)"><path
style="fill:#00316e"
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
id="path5121" /><circle
style="fill:#00316e"
sodipodi:ry="8"
sodipodi:rx="8"
sodipodi:cy="93.599998"
sodipodi:cx="69.599998"
cx="69.599998"
cy="93.599998"
r="8"
id="circle5123" /></g><g
id="g5101"
transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
id="path3157"
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
style="fill:#ffffff" /><circle
id="circle3159"
r="8"
cy="93.599998"
cx="69.599998"
sodipodi:cx="69.599998"
sodipodi:cy="93.599998"
sodipodi:rx="8"
sodipodi:ry="8"
style="fill:#ffffff" /></g>
</svg>

After

Width:  |  Height:  |  Size: 8.4 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 133 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

269
resources/images/help.svg Normal file
View File

@ -0,0 +1,269 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://web.resource.org/cc/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
version="1.0"
id="Livello_1"
width="128"
height="128"
viewBox="0 0 139 139"
overflow="visible"
enable-background="new 0 0 139 139"
xml:space="preserve"
sodipodi:version="0.32"
inkscape:version="0.45+devel"
sodipodi:docname="system-help.svgz"
inkscape:output_extension="org.inkscape.output.svgz.inkscape"
style="overflow:visible"><metadata
id="metadata3164"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
id="defs3162"><filter
inkscape:collect="always"
x="-0.132641"
width="1.265282"
y="-0.34752154"
height="1.6950431"
id="filter3547"><feGaussianBlur
inkscape:collect="always"
stdDeviation="2.7512044"
id="feGaussianBlur3549" /></filter><filter
inkscape:collect="always"
id="filter5097"><feGaussianBlur
inkscape:collect="always"
stdDeviation="2.32"
id="feGaussianBlur5099" /></filter><filter
inkscape:collect="always"
x="-0.143268"
width="1.286536"
y="-0.072184406"
height="1.1443688"
id="filter5125"><feGaussianBlur
inkscape:collect="always"
stdDeviation="1.91024"
id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
inkscape:window-height="697"
inkscape:window-width="1024"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
guidetolerance="10.0"
gridtolerance="10.0"
objecttolerance="10.0"
borderopacity="1.0"
bordercolor="#666666"
pagecolor="#ffffff"
id="base"
inkscape:zoom="2.9352518"
inkscape:cx="99.496726"
inkscape:cy="69.329657"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:current-layer="Livello_1"
height="128px"
width="128px" />
<filter
id="AI_Sfocatura_4">
<feGaussianBlur
stdDeviation="4"
id="feGaussianBlur3096" />
</filter>
<filter
id="AI_Sfocatura_2">
<feGaussianBlur
stdDeviation="2"
id="feGaussianBlur3099" />
</filter>
<radialGradient
id="XMLID_12_"
cx="69.600098"
cy="69.576698"
r="58"
gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
style="stop-color:#000000"
id="stop3102" />
<stop
offset="1"
style="stop-color:#000000;stop-opacity:0;"
id="stop3104" />
</radialGradient>
<circle
sodipodi:ry="58"
sodipodi:rx="58"
sodipodi:cy="69.599998"
sodipodi:cx="69.599998"
style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
id="circle5091"
r="58"
cy="69.599998"
cx="69.599998"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
cx="69.599998"
cy="122.173"
rx="58"
ry="10.573"
id="ellipse3106"
style="opacity:0.6;fill:url(#XMLID_12_)"
sodipodi:cx="69.599998"
sodipodi:cy="122.173"
sodipodi:rx="58"
sodipodi:ry="10.573"
transform="translate(-9.9998474e-2,1.9102535)" />
<radialGradient
id="XMLID_13_"
cx="69.600098"
cy="69.600098"
r="58"
gradientUnits="userSpaceOnUse">
<stop
offset="0.6154"
style="stop-color:#EEEEEE"
id="stop3113" />
<stop
offset="0.8225"
style="stop-color:#DDDDDD"
id="stop3115" />
<stop
offset="1"
style="stop-color:#FFFFFF"
id="stop3117" />
</radialGradient>
<circle
cx="69.599998"
cy="69.599998"
r="58"
id="circle3119"
style="fill:url(#XMLID_13_)"
sodipodi:cx="69.599998"
sodipodi:cy="69.599998"
sodipodi:rx="58"
sodipodi:ry="58"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
<linearGradient
id="XMLID_14_"
gradientUnits="userSpaceOnUse"
x1="27.6001"
y1="69.600098"
x2="111.6001"
y2="69.600098"
gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<stop
offset="0"
style="stop-color:#2A94EC"
id="stop3122" />
<stop
offset="1"
style="stop-color:#0057AE"
id="stop3124" />
</linearGradient>
<path
d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
id="path3126"
style="fill:url(#XMLID_14_)" />
<g
id="circle22111"
cy="92"
rx="36"
ry="36"
cx="343.99899"
enable-background="new "
style="opacity:0.3;filter:url(#filter3547)"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<path
d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
id="path3129"
style="fill:#a8dde0" />
</g>
<linearGradient
id="circle16776_1_"
gradientUnits="userSpaceOnUse"
x1="135.5601"
y1="417.66461"
x2="161.87621"
y2="417.66461"
gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
<stop
offset="0"
style="stop-color:#FFFFFF"
id="stop3132" />
<stop
offset="1"
style="stop-color:#ffffff;stop-opacity:0;"
id="stop3134" />
</linearGradient>
<path
id="circle16776"
enable-background="new "
d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
style="opacity:0.8;fill:url(#circle16776_1_)" />
<g
id="g3137"
transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
<defs
id="defs3139"><path
id="XMLID_10_"
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
<clipPath
id="XMLID_6_">
<use
xlink:href="#XMLID_10_"
id="use3143"
x="0"
y="0"
width="139"
height="139" />
</clipPath>
<g
clip-path="url(#XMLID_6_)"
id="g3145"
style="filter:url(#AI_Sfocatura_2)">
<path
d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
id="path3147"
style="fill:none;stroke:#00316e;stroke-width:2" />
</g>
</g>
<g
transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
id="g5119"
style="fill:#00316e;filter:url(#filter5125)"><path
style="fill:#00316e"
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
id="path5121" /><circle
style="fill:#00316e"
sodipodi:ry="8"
sodipodi:rx="8"
sodipodi:cy="93.599998"
sodipodi:cx="69.599998"
cx="69.599998"
cy="93.599998"
r="8"
id="circle5123" /></g><g
id="g5101"
transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
id="path3157"
d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
style="fill:#ffffff" /><circle
id="circle3159"
r="8"
cy="93.599998"
cx="69.599998"
sodipodi:cx="69.599998"
sodipodi:cy="93.599998"
sodipodi:rx="8"
sodipodi:ry="8"
style="fill:#ffffff" /></g>
</svg>

After

Width:  |  Height:  |  Size: 8.4 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 109 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 753 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 836 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 58 KiB

After

Width:  |  Height:  |  Size: 6.3 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 46 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

View File

@ -15,7 +15,7 @@ class Akter(BasicNewsRecipe):
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png' masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
@ -23,9 +23,9 @@ class Akter(BasicNewsRecipe):
publication_type = 'magazine' publication_type = 'magazine'
remove_empty_feeds = True remove_empty_feeds = True
PREFIX = 'http://www.akter.co.rs' PREFIX = 'http://www.akter.co.rs'
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body,.lokacija{font-family: Arial,Helvetica,sans1,sans-serif} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.color-2{display:block; margin-bottom: 10px; padding: 5px, 10px; .color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
border-left: 1px solid #D00000; color: #D00000} border-left: 1px solid #D00000; color: #D00000}
img{margin-bottom: 0.8em} """ img{margin-bottom: 0.8em} """

View File

@ -0,0 +1,65 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.alo.rs
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Alo_Novine(BasicNewsRecipe):
title = 'Alo!'
__author__ = 'Darko Miletic'
description = "News Portal from Serbia"
publisher = 'Alo novine d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
delay = 4
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'sr'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.lead {font-size: 1.3em}
h1{color: #DB0700}
.article_uvod{font-style: italic; font-size: 1.2em}
img{margin-bottom: 0.8em} """
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher': publisher
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name=['object','link','embed'])]
remove_attributes = ['height','width']
feeds = [
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
,(u'Politika' , u'http://www.alo.rs/rss/politika')
,(u'Vesti' , u'http://www.alo.rs/rss/vesti')
,(u'Sport' , u'http://www.alo.rs/rss/sport')
,(u'Ljudi' , u'http://www.alo.rs/rss/ljudi')
,(u'Saveti' , u'http://www.alo.rs/rss/saveti')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def print_version(self, url):
artl = url.rpartition('/')[0]
artid = artl.rpartition('/')[2]
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
def image_url_processor(self, baseurl, url):
return url.replace('alo.rs//','alo.rs/')

View File

@ -0,0 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
title = u'Anchorage Daily News'
__author__ = 'rty'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
(u'Business', u'http://www.adn.com/money/index.xml'),
(u'Sports', u'http://www.adn.com/sports/index.xml'),
(u'Politics', u'http://www.adn.com/politics/index.xml'),
(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
]
description = ''''Alaska's Newspaper'''
publisher = 'http://www.adn.com'
category = 'news, Alaska, Anchorage'
language = 'en'
extra_css = '''
p{font-weight: normal;text-align: justify}
'''
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'latin-1'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
keep_only_tags = [
dict(name='div', attrs={'class':'left_col story_mainbar'}),
]
remove_tags = [
dict(name='div', attrs={'class':'story_tools'}),
dict(name='p', attrs={'class':'ad_label'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'advertisement'}),
]

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'GabrieleMarini, based on Darko Miletic'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
__version__ = 'v1.02 Marini Gabriele '
__date__ = '10, January 2010'
__description__ = 'Italian daily newspaper'
'''
http://www.corrieredellosport.it/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AutoPR(BasicNewsRecipe):
__author__ = 'Gabriele Marini'
description = 'Auto and Formula 1'
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
title = u'Auto Prove'
publisher = 'CONTE Editore'
category = 'Sport'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 60
max_articles_per_feed = 20
use_embedded_content = False
recursion = 100
remove_javascript = True
no_stylesheets = True
#html2lrf_options = [
# '--comment', description
# , '--category', category
# , '--publisher', publisher
# , '--ignore-tables'
# ]
#html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [
dict(name='h2', attrs={'class':['tit_Article y_Txt']}),
dict(name='h2', attrs={'class':['tit_Article']}),
dict(name='div', attrs={'class':['box_Img newsdet_new ']}),
dict(name='div', attrs={'class':['box_Img newsdet_as ']}),
dict(name='table', attrs={'class':['table_A']}),
dict(name='div', attrs={'class':['txt_Article txtBox_cms']}),
dict(name='testoscheda')]
def parse_index(self):
feeds = []
for title, url in [
("Prove su Strada" , "http://www.auto.it/rss/prove+6.xml")
]:
soup = self.index_to_soup(url)
soup = soup.find('channel')
print soup
for article in soup.findAllNext('item'):
title = self.tag_to_string(article.title)
date = self.tag_to_string(article.pubDate)
description = self.tag_to_string(article.description)
link = self.tag_to_string(article.guid)
# print article
articles = self.create_links_append(link, date, description)
if articles:
feeds.append((title, articles))
return feeds
def create_links_append(self, link, date, description):
current_articles = []
current_articles.append({'title': 'Generale', 'url': link,'description':description, 'date':date}),
current_articles.append({'title': 'Design', 'url': link.replace('scheda','design'),'description':'scheda', 'date':''}),
current_articles.append({'title': 'Interni', 'url': link.replace('scheda','interni'),'description':'Interni', 'date':''}),
current_articles.append({'title': 'Tecnica', 'url': link.replace('scheda','tecnica'),'description':'Tecnica', 'date':''}),
current_articles.append({'title': 'Su Strada', 'url': link.replace('scheda','su_strada'),'description':'Su Strada', 'date':''}),
current_articles.append({'title': 'Pagella', 'url': link.replace('scheda','pagella'),'description':'Pagella', 'date':''}),
current_articles.append({'title': 'Rilevamenti', 'url': link.replace('scheda','telemetria'),'description':'Rilevamenti', 'date':''})
return current_articles

View File

@ -0,0 +1,39 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'BBC Chinese'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
]
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
h1 {font-family: 'DroidFont', serif;}\n
.articledescription {font-family: 'DroidFont', serif;}
'''
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'British Broadcasting Corporation'
description = 'BBC news in Chinese'
category = 'News, Chinese'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
keep_only_tags = [
dict(name='h1'),
dict(name='p', attrs={'class':['primary-topic','summary']}),
dict(name='div', attrs={'class':['bodytext','datestamp']}),
]

View File

@ -0,0 +1,64 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BigOven(BasicNewsRecipe):
title = 'BigOven'
__author__ = 'Starson17'
description = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
language = 'en'
category = 'news, food, recipes, gourmet'
publisher = 'Starson17'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
max_articles_per_feed = 30
needs_subscription = True
conversion_options = {'linearize_tables' : True
, 'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.bigoven.com/')
br.select_form(name='form1')
br['TopMenu_bo1$email'] = self.username
br['TopMenu_bo1$password'] = self.password
br.submit()
return br
remove_attributes = ['style', 'font']
keep_only_tags = [dict(name='h1')
,dict(name='div', attrs={'class':'img'})
,dict(name='div', attrs={'id':'intro'})
]
remove_tags = [dict(name='div', attrs={'style':["overflow: visible;"]})
,dict(name='div', attrs={'class':['ctas']})
#,dict(name='a', attrs={'class':['edit']})
,dict(name='p', attrs={'class':['byline']})
]
feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
def preprocess_html(self, soup):
for tag in soup.findAll(name='a', attrs={'class':['edit']}):
tag.parent.extract()
for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
tag.replaceWith(tag.string)
return soup
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -0,0 +1,39 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278162597(BasicNewsRecipe):
__author__ = 'rty'
title = u'China Economic Net'
oldest_article = 7
max_articles_per_feed = 100
pubisher = 'www.ce.cn - China Economic net - Beijing'
description = 'China Economic Net Magazine'
category = 'Economic News Magazine, Chinese, China'
feeds = [
(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
(u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'),
(u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'),
(u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml')
]
masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif'
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
h1 {font-family: 'DroidFont', serif;}\n
.articledescription {font-family: 'DroidFont', serif;}
'''
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh-cn'
encoding = 'gb2312'
conversion_options = {'linearize_tables':True}
keep_only_tags = [
dict(name='h1', attrs={'id':'articleTitle'}),
dict(name='div', attrs={'class':'laiyuan'}),
dict(name='div', attrs={'id':'articleText'}),
]

View File

@ -0,0 +1,71 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277228948(BasicNewsRecipe):
title = u'China Press USA'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'www.chinapressusa.com'
description = 'Overseas Chinese Network Newspaper in the USA'
category = 'News in Chinese, USA'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
#encoding = 'GB2312'
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url ='http://www.chinapressusa.com/common/images/logo.gif'
extra_css = '''
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {
margin-right: 8pt;
font-family: 'DroidFont', serif;}
h1 {font-family: 'DroidFont', serif, sans-serif}
.show {font-family: 'DroidFont', serif, sans-serif}
'''
feeds = [
(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
]
keep_only_tags = [
dict(name='div', attrs={'class':'show'}),
]
remove_tags = [
# dict(name='table', attrs={'class':'xle'}),
dict(name='div', attrs={'class':'time'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'bank17'}),
# dict(name='a', attrs={'class':'ab12'}),
]
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'id':'displaypagenum'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'show'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'id':'displaypagenum'})
if pager:
pager.extract()
return soup

View File

@ -1,14 +1,29 @@
import re #!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010 elsuave'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class EandP(BasicNewsRecipe): class EandP(BasicNewsRecipe):
title = u'Editor and Publisher' title = u'Editor and Publisher'
__author__ = u'Xanthan Gum' __author__ = u'elsuave (modified from Xanthan Gum)'
description = 'News about newspapers and journalism.' description = 'News about newspapers and journalism.'
publisher = 'Editor and Publisher'
category = 'news, journalism, industry'
language = 'en' language = 'en'
no_stylesheets = True max_articles_per_feed = 25
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
remove_javascript = True
oldest_article = 7 html2lrf_options = [
max_articles_per_feed = 100 '--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
# Font formatting code borrowed from kwetal # Font formatting code borrowed from kwetal
@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe):
h2{font-size: large;} h2{font-size: large;}
''' '''
# Delete everything before the article # Keep only div:itemmgap
remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'}) keep_only_tags = [
dict(name='div', attrs={'class':'itemmgap'})
]
# Delete everything after the article # Remove commenting/social media lins
preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE), remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
lambda match: '</body>'),]
feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
(u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
(u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
(u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
(u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
(u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]
feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
(u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
(u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
(u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
(u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010, elsuave'
''' '''
estadao.com.br estadao.com.br
''' '''
@ -10,12 +10,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Estadao(BasicNewsRecipe): class Estadao(BasicNewsRecipe):
title = 'O Estado de S. Paulo' title = 'O Estado de S. Paulo'
__author__ = 'Darko Miletic' __author__ = 'elsuave (modified from Darko Miletic)'
description = 'News from Brasil in Portuguese' description = 'News from Brasil in Portuguese'
publisher = 'O Estado de S. Paulo' publisher = 'O Estado de S. Paulo'
category = 'news, politics, Brasil' category = 'news, politics, Brasil'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 25
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
@ -30,13 +30,14 @@ class Estadao(BasicNewsRecipe):
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'c1'})] keep_only_tags = [
dict(name='div', attrs={'class':['bb-md-noticia','c5']})
]
remove_tags = [ remove_tags = [
dict(name=['script','object','form','ul']) dict(name=['script','object','form','ul'])
,dict(name='div', attrs={'id':['votacao','estadaohoje']}) ,dict(name='div', attrs={'class':['fnt2 Color_04 bold','right fnt2 innerTop15 dvTmFont','™_01 right outerLeft15','tituloBox','tags']})
,dict(name='p', attrs={'id':'ctrl_texto'}) ,dict(name='div', attrs={'id':['bb-md-noticia-subcom']})
,dict(name='p', attrs={'class':'texto'})
] ]
feeds = [ feeds = [
@ -51,13 +52,12 @@ class Estadao(BasicNewsRecipe):
,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml') ,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
] ]
def preprocess_html(self, soup):
ifr = soup.find('iframe')
if ifr:
ifr.extract()
for item in soup.findAll(style=True):
del item['style']
return soup
language = 'pt' language = 'pt'
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if '/Multimidia/' not in url:
return url

View File

@ -0,0 +1,52 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
evz.ro
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class EVZ_Ro(BasicNewsRecipe):
title = 'evz.ro'
__author__ = 'Darko Miletic'
description = 'News from Romania'
publisher = 'evz.ro'
category = 'news, politics, Romania'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'ro'
masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
preprocess_regexps = [
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
]
remove_tags = [
dict(name=['form','embed','iframe','object','base','link','script','noscript'])
,dict(attrs={'class':['section','statsInfo','email il']})
,dict(attrs={'id' :'gallery'})
]
remove_tags_after = dict(attrs={'class':'section'})
keep_only_tags = [dict(attrs={'class':'single'})]
remove_attributes = ['height','width']
feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,55 @@
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1276934715(BasicNewsRecipe):
title = u'Forbes India'
__author__ = 'rty'
description = 'India Edition Forbes'
publisher = 'Forbes India'
category = 'Business News, Economy, India'
oldest_article = 7
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_IN'
temp_files = []
articles_are_obfuscated = True
conversion_options = {'linearize_tables':True}
feeds = [
(u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'),
]
extra_css = '''
.t-10-gy-l{font-style: italic; font-size: small}
.t-30-b-d{font-weight: bold; font-size: xx-large}
.t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic}
.storycontent{font-size: 4px;font-family: Times New Roman;}
'''
remove_tags_before = dict(name='div', attrs={'class':'pdl10 pdr15'})
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
response = br.follow_link(url_regex = r'/printcontent/[0-9]+', nr = 0)
html = response.read()
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def get_cover_url(self):
index = 'http://business.in.com/magazine/'
soup = self.index_to_soup(index)
for image in soup.findAll('a',{ "class" : "lbOn a-9-b-d" }):
return image['href']
#return image['href'] + '.jpg'
return None
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup

View File

@ -0,0 +1,45 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.foreignpolicy.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ForeignPolicy(BasicNewsRecipe):
title = 'Foreign Policy'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
category = 'news, politics, USA'
oldest_article = 31
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
remove_tags = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
remove_attributes = ['height','width']
feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
def print_version(self, url):
return url + '?print=yes&page=full'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,56 +1,95 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
haaretz.com www.haaretz.com
''' '''
import re
from calibre import strftime
from time import gmtime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Haaretz_en(BasicNewsRecipe): class HaaretzPrint_en(BasicNewsRecipe):
title = 'Haaretz in English' title = 'Haaretz - print edition'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. ' description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
publisher = 'haaretz.com' publisher = 'Haaretz'
category = 'news, politics, Israel' category = "news, Haaretz, Israel news, Israel newspapers, Israel business news, Israel financial news, Israeli news,Israeli newspaper, Israeli newspapers, news from Israel, news in Israel, news Israel, news on Israel, newspaper Israel, Israel sports news, Israel diplomacy news"
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'en_IL' language = 'en_IL'
publication_type = 'newspaper' publication_type = 'newspaper'
remove_empty_feeds = True PREFIX = 'http://www.haaretz.com'
masthead_url = 'http://www.haaretz.com/images/logos/logoGrey.gif' masthead_url = PREFIX + '/images/logos/logoGrey.gif'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } ' extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher': publisher
, 'language' : language , 'language' : language
} }
remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')] keep_only_tags = [dict(attrs={'id':'threecolumns'})]
remove_tags_before = dict(name='h1') remove_attributes = ['width','height']
remove_tags_after = dict(attrs={'id':'innerArticle'}) remove_tags = [
keep_only_tags = [dict(attrs={'id':'content'})] dict(name=['iframe','link','object','embed'])
,dict(name='div',attrs={'class':'rightcol'})
]
feeds = [ feeds = [
(u'Opinion' , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false' ) (u'News' , PREFIX + u'/print-edition/news' )
,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false') ,(u'Opinion' , PREFIX + u'/print-edition/opinion' )
,(u'National' , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false' ) ,(u'Business' , PREFIX + u'/print-edition/business' )
,(u'International' , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false' ) ,(u'Real estate' , PREFIX + u'/print-edition/real-estate' )
,(u'Jewish World' , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false' ) ,(u'Sports' , PREFIX + u'/print-edition/sports' )
,(u'Business' , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false' ) ,(u'Travel' , PREFIX + u'/print-edition/travel' )
,(u'Real Estate' , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false' ) ,(u'Books' , PREFIX + u'/print-edition/books' )
,(u'Features' , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false' ) ,(u'Food & Wine' , PREFIX + u'/print-edition/food-wine' )
,(u'Arts and leisure' , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false' ) ,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' )
,(u'Books' , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false' ) ,(u'Features' , PREFIX + u'/print-edition/features' )
,(u'Food and Wine' , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false' )
,(u'Sports' , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false' )
] ]
def print_version(self, url):
article = url.rpartition('/')[2]
return 'http://www.haaretz.com/misc/article-print-page/' + article
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll(attrs={'class':'text'}):
sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
desc = item.find('p')
description = ''
if sp:
if desc:
description = self.tag_to_string(desc)
link = sp.a
url = self.PREFIX + link['href']
title = self.tag_to_string(link)
times = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
articles.append({
'title' :title
,'date' :times
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']

View File

@ -2,7 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
import re import time
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TheHindu(BasicNewsRecipe): class TheHindu(BasicNewsRecipe):
@ -10,45 +10,41 @@ class TheHindu(BasicNewsRecipe):
language = 'en_IN' language = 'en_IN'
oldest_article = 7 oldest_article = 7
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_tags_before = {'name':'font', 'class':'storyhead'} keep_only_tags = [dict(id='content')]
preprocess_regexps = [ remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
(re.compile(r'<!-- story ends -->.*', re.DOTALL), dict(id=['email-section', 'right-column', 'printfooter'])]
lambda match: '</body></html>'),
] extra_css = '.photo-caption { font-size: smaller }'
extra_css = '''
.storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
'''
feeds = [
(u'Main - Front Page', u'http://www.hindu.com/rss/01hdline.xml'),
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
(u'Main - Weather / Religion / Crossword / Cartoon',
u'http://www.hindu.com/rss/10hdline.xml'),
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
(u'Supplement - Literary Review',
u'http://www.hindu.com/rss/lrhdline.xml'),
(u'Supplement - Sunday Magazine',
u'http://www.hindu.com/rss/maghdline.xml'),
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
(u'Supplement - Business Review',
u'http://www.hindu.com/rss/bizhdline.xml'),
(u'Supplement - Book Review',
u'http://www.hindu.com/rss/brhdline.xml'),
(u'Supplement - Science & Technology',
u'http://www.hindu.com/rss/setahdline.xml')
]
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td','center']): for t in soup.findAll(['table', 'tr', 'td','center']):
t.name = 'div' t.name = 'div'
return soup return soup
def parse_index(self):
today = time.strftime('%Y-%m-%d')
soup = self.index_to_soup(
'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
div = soup.find(id='left-column')
feeds = []
current_section = None
current_articles = []
for x in div.findAll(['h3', 'div']):
if current_section and x.get('class', '') == 'tpaper':
a = x.find('a', href=True)
if a is not None:
current_articles.append({'url':a['href']+'?css=print',
'title':self.tag_to_string(a), 'date': '',
'description':''})
if x.name == 'h3':
if current_section and current_articles:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
return feeds

View File

@ -1,12 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import string, pprint
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HoustonChronicle(BasicNewsRecipe): class HoustonChronicle(BasicNewsRecipe):
title = u'The Houston Chronicle' title = u'The Houston Chronicle'
description = 'News from Houston, Texas' description = 'News from Houston, Texas'
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal'
language = 'en' language = 'en'
timefmt = ' [%a, %d %b, %Y]' timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True no_stylesheets = True
@ -38,54 +41,23 @@ class HoustonChronicle(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.chron.com/news/') categories = ['news', 'sports', 'business', 'entertainment', 'life',
container = soup.find('table', attrs={'class':'body-columns'}) 'travel']
feeds = [] feeds = []
current_section = 'Top Stories' for cat in categories:
current_articles = [] articles = []
soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
self.log('\tFound section:', current_section) for elem in soup.findAll(comptype='story', storyid=True):
a = elem.find('a', href=True)
for div in container.findAll('div'): if a is None: continue
if div.get('class', None) == 'module-mast': url = a['href']
t = self.tag_to_string(div).replace(u'\xbb', '').strip() if not url.startswith('http://'):
if t and 'interactives' not in t: url = 'http://www.chron.com'+url
if current_section and current_articles: articles.append({'title':self.tag_to_string(a), 'url':url,
feeds.append((current_section, current_articles)) 'description':'', 'date':''})
current_section = t pprint.pprint(articles[-1])
current_articles = [] if articles:
self.log('\tFound section:', current_section) feeds.append((string.capwords(cat), articles))
elif div.get('storyid', False):
a = div.find('a', href=True)
if a:
title = self.tag_to_string(a)
url = a.get('href')
if title and url:
if url.startswith('/'):
url = 'http://www.chron.com'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title':title, 'url':url,
'date':'', 'description':''})
elif div.get('class', None) == 'columnbox' and \
'special' in current_section.lower():
a = div.find('a')
if a:
title = self.tag_to_string(a)
url = a.get('href')
if title and url:
if not url.startswith('/'): continue
url = 'http://www.chron.com'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
a.extract()
desc = self.tag_to_string(div)
current_articles.append({'title':title, 'url':url,
'date':'', 'description':desc})
if current_section and current_articles:
feeds.append((current_section, current_articles))
return feeds return feeds

View File

@ -0,0 +1,50 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277305250(BasicNewsRecipe):
title = u'infzm - China Southern Weekly'
oldest_article = 14
max_articles_per_feed = 100
feeds = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'),
(u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', u'http://www.infzm.com/rss/economic.xml'),
(u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', u'http://www.infzm.com/rss/culture.xml'),
(u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', u'http://www.infzm.com/rss/lifestyle.xml'),
(u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', u'http://www.infzm.com/rss/opinion.xml')
]
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'http://www.infzm.com'
description = 'Chinese Weekly Tabloid'
category = 'News, China'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
#encoding = 'GB2312'
encoding = 'UTF-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg'
extra_css = '''
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {
margin-right: 8pt;
font-family: 'DroidFont', serif;}
.detailContent {font-family: 'DroidFont', serif, sans-serif}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'detailContent'}),
]
remove_tags = [
dict(name='div', attrs={'id':['detailTools', 'detailSideL', 'pageNum']}),
]
remove_tags_after = [
dict(name='div', attrs={'id':'pageNum'}),
]
def preprocess_html(self, soup):
for item in soup.findAll(color=True):
del item['font']
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class LondonFreePress(BasicNewsRecipe):
title = u'London Free Press'
__author__ = 'rty'
oldest_article = 4
max_articles_per_feed = 100
pubisher = 'lfpress.com'
description = 'Ontario Canada Newspaper'
category = 'News, Ontario, Canada'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_CA'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
feeds = [
(u'News', u'http://www.lfpress.com/news/rss.xml'),
(u'Comment', u'http://www.lfpress.com/comment/rss.xml'),
(u'Entertainment', u'http://www.lfpress.com/entertainment/rss.xml '),
(u'Money', u'http://www.lfpress.com/money/rss.xml '),
(u'Life', u'http://www.lfpress.com/life/rss.xml '),
(u'Sports', u'http://www.lfpress.com/sports/rss.xml ')
]
keep_only_tags = [
dict(name='div', attrs={'id':'article'}),
]
remove_tags = [
dict(name='div', attrs={'id':'commentsBottom'}),
dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'bottomBox clear'}),
]

View File

@ -0,0 +1,48 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.vatican.va/news_services/or/or_quo
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LOsservatoreRomano_it(BasicNewsRecipe):
title = "L'Osservatore Romano"
__author__ = 'Darko Miletic'
description = 'Quiornale quotidiano, politico, religioso del Vaticano'
publisher = 'La Santa Sede'
category = 'news, politics, religion, Vatican'
no_stylesheets = True
INDEX = 'http://www.vatican.va'
FEEDPAGE = INDEX + '/news_services/or/or_quo/index.html'
CONTENTPAGE = INDEX + '/news_services/or/or_quo/text.html'
use_embedded_content = False
encoding = 'cp1252'
language = 'it'
publication_type = 'newspaper'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def parse_index(self):
articles = []
articles.append({
'title' :self.title
,'date' :''
,'url' :self.CONTENTPAGE
,'description':''
})
return [(self.title, articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
lrb.co.uk lrb.co.uk
''' '''
@ -8,17 +8,20 @@ lrb.co.uk
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LondonReviewOfBooks(BasicNewsRecipe): class LondonReviewOfBooks(BasicNewsRecipe):
title = u'London Review of Books' title = 'London Review of Books (free)'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
category = 'news, literature, England' category = 'news, literature, UK'
publisher = 'London Review of Books' publisher = 'LRB ltd.'
oldest_article = 7 oldest_article = 15
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en_GB' language = 'en_GB'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
publication_type = 'magazine'
masthead_url = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -27,13 +30,16 @@ class LondonReviewOfBooks(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [dict(name='div' , attrs={'id' :'main'})] keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
remove_tags = [ remove_attributes = ['width','height']
dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
,dict(name='div' , attrs={'id' :['mainmenu','precontent','otherarticles'] })
,dict(name='span', attrs={'class':['inlineright','article-icons']})
,dict(name='ul' , attrs={'class':'article-controls'})
,dict(name='p' , attrs={'class':'meta-info' })
]
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')] feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.lrb.co.uk/')
cover_item = soup.find('p',attrs={'class':'cover'})
if cover_item:
cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
return cover_url

View File

@ -0,0 +1,75 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
lrb.co.uk
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LondonReviewOfBooksPayed(BasicNewsRecipe):
title = 'London Review of Books'
__author__ = 'Darko Miletic'
description = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
category = 'news, literature, UK'
publisher = 'LRB Ltd.'
max_articles_per_feed = 100
language = 'en_GB'
no_stylesheets = True
delay = 1
use_embedded_content = False
encoding = 'utf-8'
INDEX = 'http://www.lrb.co.uk'
LOGIN = INDEX + '/login'
masthead_url = INDEX + '/assets/images/lrb_logo_big.gif'
needs_subscription = True
publication_type = 'magazine'
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(nr=1)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('p',attrs={'class':'cover'})
lrbtitle = self.title
if cover_item:
self.cover_url = self.INDEX + cover_item.a.img['src']
content = self.INDEX + cover_item.a['href']
soup2 = self.index_to_soup(content)
sitem = soup2.find(attrs={'class':'article-list'})
lrbtitle = soup2.head.title.string
for item in sitem.findAll('a',attrs={'class':'title'}):
description = u''
title_prefix = u''
feed_link = item
if feed_link.has_key('href'):
url = self.INDEX + feed_link['href']
title = title_prefix + self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [(lrbtitle, articles)]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
remove_attributes = ['width','height']

View File

@ -0,0 +1,43 @@
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1276930924(BasicNewsRecipe):
title = u'Maximum PC'
__author__ = 'rty'
description = 'Maximum PC'
publisher = 'http://www.maximumpc.com'
category = 'news, computer, technology'
language = 'en'
oldest_article = 30
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
temp_files = []
articles_are_obfuscated = True
feeds = [(u'News', u'http://www.maximumpc.com/articles/4/feed'),
(u'Reviews', u'http://www.maximumpc.com/articles/40/feed'),
(u'Editors Blog', u'http://www.maximumpc.com/articles/6/feed'),
(u'How-to', u'http://www.maximumpc.com/articles/32/feed'),
(u'Features', u'http://www.maximumpc.com/articles/31/feed'),
(u'From the Magazine', u'http://www.maximumpc.com/articles/72/feed')
]
keep_only_tags = [
dict(name='div', attrs={'class':['print-title','article_body']}),
]
remove_tags = [
dict(name='div', attrs={'class':'comments-tags-actions'}),
]
remove_tags_before = dict(name='div', attrs={'class':'print-title'})
remove_tags_after = dict(name='div', attrs={'class':'meta-content'})
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
html = response.read()
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name

View File

@ -7,18 +7,18 @@ class NYTimes(BasicNewsRecipe):
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal'
description = 'Canadian national newspaper' description = 'Canadian national newspaper'
timefmt = ' [%d %b, %Y]' timefmt = ' [%d %b, %Y]'
needs_subscription = False
language = 'en_CA' language = 'en_CA'
needs_subscription = False
no_stylesheets = True no_stylesheets = True
#remove_tags_before = dict(name='h1', attrs={'class':'heading'}) #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'}) remove_tags_after = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name='iframe'),
dict(name='div', attrs={'class':'story-tools'}), dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
#dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}), #dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
#dict(name='form', attrs={'onsubmit':''}), #dict(name='form', attrs={'onsubmit':''}),
#dict(name='table', attrs={'cellspacing':'0'}), dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
] ]
# def preprocess_html(self, soup): # def preprocess_html(self, soup):
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.nejm_get_index() soup = self.nejm_get_index()
div = soup.find(id='LegoText4') div = soup.find(id='npContentMain')
current_section = None current_section = None
current_articles = [] current_articles = []
@ -50,7 +50,7 @@ class NYTimes(BasicNewsRecipe):
current_section = self.tag_to_string(x) current_section = self.tag_to_string(x)
current_articles = [] current_articles = []
self.log('\tFound section:', current_section) self.log('\tFound section:', current_section)
if current_section is not None and x.name == 'h3': if current_section is not None and x.name == 'h5':
# Article found # Article found
title = self.tag_to_string(x) title = self.tag_to_string(x)
a = x.find('a', href=lambda x: x and 'story' in x) a = x.find('a', href=lambda x: x and 'story' in x)
@ -59,8 +59,8 @@ class NYTimes(BasicNewsRecipe):
url = a.get('href', False) url = a.get('href', False)
if not url or not title: if not url or not title:
continue continue
if url.startswith('story'): #if url.startswith('story'):
url = 'http://www.nationalpost.com/todays-paper/'+url url = 'http://www.nationalpost.com/todays-paper/'+url
self.log('\t\tFound article:', title) self.log('\t\tFound article:', title)
self.log('\t\t\t', url) self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url, current_articles.append({'title': title, 'url':url,
@ -70,28 +70,11 @@ class NYTimes(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'class':'triline'}) story = soup.find(name='div', attrs={'id':'npContentMain'})
page2_link = soup.find('p','pagenav') ##td = heading.findParent(name='td')
if page2_link: ##td.extract()
atag = page2_link.find('a',href=True)
if atag:
page2_url = atag['href']
if page2_url.startswith('story'):
page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
elif page2_url.startswith( '/todays-paper/story.html'):
page2_url = 'http://www.nationalpost.com/'+page2_url
page2_soup = self.index_to_soup(page2_url)
if page2_soup:
page2_content = page2_soup.find('div','story-content')
if page2_content:
full_story = BeautifulSoup('<div></div>')
full_story.insert(0,story)
full_story.insert(1,page2_content)
story = full_story
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>') soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body') body = soup.find(name='body')
body.insert(0, story) body.insert(0, story)
return soup return soup

View File

@ -32,15 +32,16 @@ class NewScientist(BasicNewsRecipe):
} }
preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')] preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})] keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]}) dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]})
,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools']}) ,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial']})
,dict(name='p' , attrs={'class':['marker','infotext' ]}) ,dict(name='p' , attrs={'class':['marker','infotext' ]})
,dict(name='meta' , attrs={'name' :'description' }) ,dict(name='meta' , attrs={'name' :'description' })
,dict(name='a' , attrs={'rel' :'tag' })
] ]
remove_tags_after = dict(attrs={'class':'nbpcopy'}) remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
remove_attributes = ['height','width'] remove_attributes = ['height','width']
feeds = [ feeds = [

View File

@ -17,6 +17,7 @@ class NYTimes(BasicNewsRecipe):
title = 'New York Times Top Stories' title = 'New York Times Top Stories'
__author__ = 'GRiker' __author__ = 'GRiker'
language = 'en' language = 'en'
requires_version = (0, 7, 5)
description = 'Top Stories from the New York Times' description = 'Top Stories from the New York Times'
# List of sections typically included in Top Stories. Use a keyword from the # List of sections typically included in Top Stories. Use a keyword from the
@ -64,6 +65,7 @@ class NYTimes(BasicNewsRecipe):
timefmt = '' timefmt = ''
needs_subscription = True needs_subscription = True
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
remove_tags_before = dict(id='article') remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article') remove_tags_after = dict(id='article')
@ -77,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule', 'doubleRule',
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module',
'icon enlargeThis', 'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'module box nav', 'module box nav',
@ -86,6 +89,7 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule', 'relatedSearchesModule',
'side_tool', 'side_tool',
'singleAd', 'singleAd',
'subNavigation clearfix',
'subNavigation tabContent active', 'subNavigation tabContent active',
'subNavigation tabContent active clearfix', 'subNavigation tabContent active clearfix',
]}), ]}),
@ -108,6 +112,7 @@ class NYTimes(BasicNewsRecipe):
'navigation', 'navigation',
'portfolioInline', 'portfolioInline',
'relatedArticles', 'relatedArticles',
'respond',
'side_search', 'side_search',
'side_index', 'side_index',
'side_tool', 'side_tool',
@ -183,6 +188,16 @@ class NYTimes(BasicNewsRecipe):
self.log("\nFailed to login") self.log("\nFailed to login")
return br return br
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
return self.index_to_soup(url, raw=True)
def get_cover_url(self): def get_cover_url(self):
cover = None cover = None
st = time.localtime() st = time.localtime()
@ -391,14 +406,6 @@ class NYTimes(BasicNewsRecipe):
return ans return ans
def preprocess_html(self, soup): def preprocess_html(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url)
return self.strip_anchors(soup) return self.strip_anchors(soup)
def postprocess_html(self,soup, True): def postprocess_html(self,soup, True):
@ -454,8 +461,10 @@ class NYTimes(BasicNewsRecipe):
if mp_off >= 0: if mp_off >= 0:
c = c[:mp_off] c = c[:mp_off]
emTag.insert(0, c) emTag.insert(0, c)
hrTag = Tag(soup, 'hr') #hrTag = Tag(soup, 'hr')
#hrTag['style'] = "margin-top:0em;margin-bottom:0em" #hrTag['class'] = 'caption_divider'
hrTag = Tag(soup, 'div')
hrTag['class'] = 'divider'
emTag.insert(1, hrTag) emTag.insert(1, hrTag)
caption.replaceWith(emTag) caption.replaceWith(emTag)

View File

@ -13,13 +13,14 @@ Story
import re, string, time import re, string, time
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
class NYTimes(BasicNewsRecipe): class NYTimes(BasicNewsRecipe):
title = 'The New York Times' title = 'The New York Times'
__author__ = 'GRiker' __author__ = 'GRiker'
language = 'en' language = 'en'
requires_version = (0, 7, 5)
description = 'Daily news from the New York Times (subscription version)' description = 'Daily news from the New York Times (subscription version)'
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials', allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
@ -65,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
'doubleRule', 'doubleRule',
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module',
'icon enlargeThis', 'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'module box nav', 'module box nav',
@ -74,6 +76,7 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule', 'relatedSearchesModule',
'side_tool', 'side_tool',
'singleAd', 'singleAd',
'subNavigation clearfix',
'subNavigation tabContent active', 'subNavigation tabContent active',
'subNavigation tabContent active clearfix', 'subNavigation tabContent active clearfix',
]}), ]}),
@ -96,6 +99,7 @@ class NYTimes(BasicNewsRecipe):
'navigation', 'navigation',
'portfolioInline', 'portfolioInline',
'relatedArticles', 'relatedArticles',
'respond',
'side_search', 'side_search',
'side_index', 'side_index',
'side_tool', 'side_tool',
@ -103,6 +107,7 @@ class NYTimes(BasicNewsRecipe):
]), ]),
dict(name=['script', 'noscript', 'style'])] dict(name=['script', 'noscript', 'style'])]
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
no_stylesheets = True no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \ extra_css = '.headline {text-align: left;}\n \
.byline {font-family: monospace; \ .byline {font-family: monospace; \
@ -158,7 +163,7 @@ class NYTimes(BasicNewsRecipe):
return cover return cover
def get_masthead_title(self): def get_masthead_title(self):
return 'NYTimes GR Version' return self.title
def dump_ans(self, ans): def dump_ans(self, ans):
total_article_count = 0 total_article_count = 0
@ -279,15 +284,17 @@ class NYTimes(BasicNewsRecipe):
self.dump_ans(ans) self.dump_ans(ans)
return ans return ans
def preprocess_html(self, soup): def skip_ad_pages(self, soup):
# Skip ad pages served before actual article # Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'}) skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None: if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href']) self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all' url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url) self.log.warn("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url) return self.index_to_soup(url, raw=True)
def preprocess_html(self, soup):
return self.strip_anchors(soup) return self.strip_anchors(soup)
def postprocess_html(self,soup, True): def postprocess_html(self,soup, True):
@ -329,7 +336,7 @@ class NYTimes(BasicNewsRecipe):
self.log(">>> No class:'columnGroup first' found <<<") self.log(">>> No class:'columnGroup first' found <<<")
# Change class="kicker" to <h3> # Change class="kicker" to <h3>
kicker = soup.find(True, {'class':'kicker'}) kicker = soup.find(True, {'class':'kicker'})
if kicker and kicker.contents[0]: if kicker and kicker.contents and kicker.contents[0]:
h3Tag = Tag(soup, "h3") h3Tag = Tag(soup, "h3")
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
use_alt=False))) use_alt=False)))
@ -344,8 +351,10 @@ class NYTimes(BasicNewsRecipe):
if mp_off >= 0: if mp_off >= 0:
c = c[:mp_off] c = c[:mp_off]
emTag.insert(0, c) emTag.insert(0, c)
hrTag = Tag(soup, 'hr') #hrTag = Tag(soup, 'hr')
#hrTag['style'] = "margin-top:0em;margin-bottom:0em" #hrTag['class'] = 'caption_divider'
hrTag = Tag(soup, 'div')
hrTag['class'] = 'divider'
emTag.insert(1, hrTag) emTag.insert(1, hrTag)
caption.replaceWith(emTag) caption.replaceWith(emTag)
@ -413,12 +422,11 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
def postprocess_book(self, oeb, opts, log) : def populate_article_metadata(self,article,soup,first):
print "\npostprocess_book()\n" '''
Extract author and description from article, add to article metadata
def extract_byline(href) : '''
# <meta name="byline" content= def extract_author(soup):
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
byline = soup.find('meta',attrs={'name':['byl','CLMST']}) byline = soup.find('meta',attrs={'name':['byl','CLMST']})
if byline : if byline :
author = byline['content'] author = byline['content']
@ -428,50 +436,34 @@ class NYTimes(BasicNewsRecipe):
if byline: if byline:
author = byline.renderContents() author = byline.renderContents()
else: else:
print "couldn't find byline in %s" % href
print soup.prettify() print soup.prettify()
return None return None
# Kill commas - Kindle switches to '&' return author
return re.sub(',','',author)
def extract_description(href) : def extract_description(soup):
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
description = soup.find('meta',attrs={'name':['description','description ']}) description = soup.find('meta',attrs={'name':['description','description ']})
if description : if description :
# print repr(description['content'])
# print self.massageNCXText(description['content'])
return self.massageNCXText(description['content']) return self.massageNCXText(description['content'])
else: else:
# Take first paragraph of article # Take first paragraph of article
articleBody = soup.find('div',attrs={'id':'articleBody'}) articlebody = soup.find('div',attrs={'id':'articlebody'})
if not articleBody: if not articlebody:
# Try again with class instead of id # Try again with class instead of id
articleBody = soup.find('div',attrs={'class':'articleBody'}) articlebody = soup.find('div',attrs={'class':'articlebody'})
if not articleBody: if not articlebody:
print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:' print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
print soup.prettify() print soup.prettify()
return None return None
paras = articleBody.findAll('p') paras = articlebody.findAll('p')
for p in paras: for p in paras:
if p.renderContents() > '' : if p.renderContents() > '' :
return self.massageNCXText(self.tag_to_string(p,use_alt=False)) return self.massageNCXText(self.tag_to_string(p,use_alt=False))
return None return None
# Method entry point here if not article.author:
# Single section toc looks different than multi-section tocs article.author = extract_author(soup)
if oeb.toc.depth() == 2 : if not article.summary:
for article in oeb.toc : article.summary = article.text_summary = extract_description(soup)
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href).decode('utf-8')
elif oeb.toc.depth() == 3 :
for section in oeb.toc :
for article in section :
if article.author is None :
article.author = extract_byline(article.href)
if article.description is None :
article.description = extract_description(article.href)
def strip_anchors(self,soup): def strip_anchors(self,soup):
paras = soup.findAll(True) paras = soup.findAll(True)

View File

@ -28,7 +28,7 @@ class OldNewThing(BasicNewsRecipe):
} }
remove_attributes = ['width','height'] remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':['postsub','comment']})] keep_only_tags = [dict(attrs={'class':'full-post'})]
remove_tags = [dict(attrs={'class':['post-attributes','post-tags','post-actions']})]
feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')] feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]

View File

@ -0,0 +1,57 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277129332(BasicNewsRecipe):
title = u'People Daily - China'
oldest_article = 2
max_articles_per_feed = 100
__author__ = 'rty'
pubisher = 'people.com.cn'
description = 'People Daily Newspaper'
language = 'zh'
category = 'News, China'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'GB2312'
conversion_options = {'linearize_tables':True}
feeds = [(u'\u56fd\u5185\u65b0\u95fb', u'http://www.people.com.cn/rss/politics.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.people.com.cn/rss/world.xml'),
(u'\u7ecf\u6d4e\u65b0\u95fb', u'http://www.people.com.cn/rss/finance.xml'),
(u'\u4f53\u80b2\u65b0\u95fb', u'http://www.people.com.cn/rss/sports.xml'),
(u'\u53f0\u6e7e\u65b0\u95fb', u'http://www.people.com.cn/rss/haixia.xml')]
keep_only_tags = [
dict(name='div', attrs={'class':'left_content'}),
]
remove_tags = [
dict(name='table', attrs={'class':'title'}),
]
remove_tags_after = [
dict(name='table', attrs={'class':'bianji'}),
]
def append_page(self, soup, appendtag, position):
pager = soup.find('img',attrs={'src':'/img/next_b.gif'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'left_content'})
#for it in texttag.findAll(style=True):
# del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="utf-8" />'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['form']
self.append_page(soup, soup.body, 3)
#pager = soup.find('a',attrs={'class':'ab12'})
#if pager:
# pager.extract()
return soup

View File

@ -1,39 +1,44 @@
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class PsychologyToday(BasicNewsRecipe): class AdvancedUserRecipe1275708473(BasicNewsRecipe):
title = u'Psychology Today' title = u'Psychology Today'
language = 'en' _author__ = 'rty'
__author__ = 'Krittika Goyal' publisher = u'www.psychologytoday.com'
oldest_article = 1 #days category = u'Psychology'
max_articles_per_feed = 25 max_articles_per_feed = 100
#encoding = 'latin1' remove_javascript = True
use_embedded_content = False
remove_stylesheets = True no_stylesheets = True
#remove_tags_before = dict(name='h1', attrs={'class':'heading'}) language = 'en'
#remove_tags_after = dict(name='td', attrs={'class':'newptool1'}) temp_files = []
articles_are_obfuscated = True
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name='div', attrs={'class':['print-source_url','field-items','print-footer']}),
dict(name='div', attrs={'class':['pt-box-title', 'pt-box-content', 'blog-entry-footer', 'item-list', 'article-sub-meta']}), dict(name='span', attrs={'class':'print-footnote'}),
dict(name='div', attrs={'id':['block-td_search_160', 'block-cam_search_160']}), ]
#dict(name='ul', attrs={'class':'article-tools'}), remove_tags_before = dict(name='h1', attrs={'class':'print-title'})
#dict(name='ul', attrs={'class':'articleTools'}), remove_tags_after = dict(name='div', attrs={'class':['field-items','print-footer']})
]
feeds = [ feeds = [(u'Contents', u'http://www.psychologytoday.com/articles/index.rss')]
('PSY TODAY',
'http://www.psychologytoday.com/articles/index.rss'),
]
def preprocess_html(self, soup): def get_article_url(self, article):
story = soup.find(name='div', attrs={'id':'contentColumn'}) return article.get('link', None)
#td = heading.findParent(name='td')
#td.extract() def get_obfuscated_article(self, url):
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>') br = self.get_browser()
body = soup.find(name='body') br.open(url)
body.insert(0, story) response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x): html = response.read()
p = x.findParent('p') self.temp_files.append(PersistentTemporaryFile('_fa.html'))
if p is not None: self.temp_files[-1].write(html)
p.extract() self.temp_files[-1].close()
return soup return self.temp_files[-1].name
def get_cover_url(self):
index = 'http://www.psychologytoday.com/magazine/'
soup = self.index_to_soup(index)
for image in soup.findAll('img',{ "class" : "imagefield imagefield-field_magazine_cover" }):
return image['src'] + '.jpg'
return None

View File

@ -0,0 +1,79 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
title = u'Singtao Daily - Canada'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'rty'
description = 'Toronto Canada Chinese Newspaper'
publisher = 'news.singtao.ca'
category = 'Chinese, News, Canada'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg'
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\
body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\
h1 {font-family: 'DroidFont', serif;}\
.articledescription {font-family: 'DroidFont', serif;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':['title','storybody']}),
dict(name='div', attrs={'class':'content'})
]
def parse_index(self):
feeds = []
for title, url in [
('Editorial',
'http://news.singtao.ca/toronto/editorial.html'),
('Toronto \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'),
'http://news.singtao.ca/toronto/city.html'),
('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'),
'http://news.singtao.ca/toronto/canada.html'),
('Entertainment',
'http://news.singtao.ca/toronto/entertainment.html'),
('World',
'http://news.singtao.ca/toronto/world.html'),
('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'),
'http://news.singtao.ca/toronto/finance.html'),
('Sports', 'http://news.singtao.ca/toronto/sports.html'),
]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'class': ['newslist paddingL10T10','newslist3 paddingL10T10']})
#date = div.find(attrs={'class': 'underlineBLK'})
current_articles = []
for li in div.findAll('li'):
a = li.find('a', href = True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://news.singtao.ca'+url
# self.log('\ \ Found article:', title)
# self.log('\ \ \ ', url)
current_articles.append({'title': title, 'url': url, 'description':''})
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278049615(BasicNewsRecipe):
title = u'Statesman'
pubisher = 'http://www.statesman.com/'
description = 'Austin Texas Daily Newspaper'
category = 'News, Austin, Texas'
__author__ = 'rty'
oldest_article = 3
max_articles_per_feed = 100
feeds = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
]
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
#temp_files = []
#articles_are_obfuscated = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
remove_tags = [
dict(name='div', attrs={'id':'cxArticleOptions'}),
]
keep_only_tags = [
dict(name='div', attrs={'class':'cxArticleHeader'}),
dict(name='div', attrs={'id':'cxArticleBodyText'}),
]

View File

@ -0,0 +1,59 @@
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1276486274(BasicNewsRecipe):
title = u'Today Online - Singapore'
publisher = 'MediaCorp Press Ltd - Singapore'
__author__ = 'rty'
category = 'news, Singapore'
oldest_article = 7
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en_SG'
temp_files = []
articles_are_obfuscated = True
masthead_url = 'http://www.todayonline.com/App_Themes/Default/images/icons/TodayOnlineLogo.gif'
conversion_options = {'linearize_tables':True}
extra_css = '''
.author{font-style: italic; font-size: small}
.date{font-style: italic; font-size: small}
.Headline{font-weight: bold; font-size: xx-large}
.headerStrap{font-weight: bold; font-size: x-large; font-syle: italic}
.bodyText{font-size: 4px;font-family: Times New Roman;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':['fullPrintBodyHolder']})
]
remove_tags_after = [ dict(name='div', attrs={'class':'button'})]
remove_tags = [
dict(name='div', attrs={'class':['url','button']})
]
feeds = [
(u'Singapore', u'http://www.todayonline.com/RSS/Singapore'),
(u'Hot News', u'http://www.todayonline.com/RSS/Hotnews'),
(u'Today Online', u'http://www.todayonline.com/RSS/Todayonline'),
(u'Voices', u'http://www.todayonline.com/RSS/Voices'),
(u'Commentary', u'http://www.todayonline.com/RSS/Commentary'),
(u'World', u'http://www.todayonline.com/RSS/World'),
(u'Business', u'http://www.todayonline.com/RSS/Business'),
(u'Column', u'http://www.todayonline.com/RSS/Columns'),
]
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
response = br.follow_link(url_regex = r'/Print/', nr = 0)
html = response.read()
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,21 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class TimesOfIndia(BasicNewsRecipe): class TimesOfIndia(BasicNewsRecipe):
title = u'Times of India' title = u'Times of India'
language = 'en_IN' language = 'en_IN'
__author__ = 'Krittika Goyal' __author__ = 'Kovid Goyal'
oldest_article = 1 #days oldest_article = 1 #days
max_articles_per_feed = 25 max_articles_per_feed = 25
remove_stylesheets = True no_stylesheets = True
keep_only_tags = [dict(attrs={'class':'prttabl'})]
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(style=lambda x: x and 'float' in x)
dict(name='td', attrs={'class':'newptool1'}),
dict(name='div', attrs={'id':'newptool'}),
dict(name='ul', attrs={'class':'newtabcontent_tabs_new'}),
dict(name='b', text='Topics'),
dict(name='span', text=':'),
] ]
feeds = [ feeds = [
@ -42,13 +37,8 @@ class TimesOfIndia(BasicNewsRecipe):
('Most Read', ('Most Read',
'http://timesofindia.indiatimes.com/rssfeedmostread.cms') 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
] ]
def print_version(self, url):
return url + '?prtpage=1'
def preprocess_html(self, soup): def preprocess_html(self, soup):
heading = soup.find(name='h1', attrs={'class':'heading'})
td = heading.findParent(name='td')
td.extract()
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, td)
td.name = 'div'
return soup return soup

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277647803(BasicNewsRecipe):
title = u'Winnipeg Sun'
__author__ = 'rty'
__version__ = '1.0'
oldest_article = 2
pubisher = 'www.winnipegsun.com'
description = 'Winnipeg Newspaper'
category = 'News, Winnipeg, Canada'
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'UTF-8'
remove_javascript = True
use_embedded_content = False
language = 'en_CA'
feeds = [
(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
]
keep_only_tags = [
dict(name='div', attrs={'id':'article'}),
]
remove_tags = [
dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
dict(name='div', attrs={'id':'commentsBottom'}),
]
remove_tags_after = [
dict(name='div', attrs={'class':'bottomBox clear'})
]

View File

@ -3,15 +3,15 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import string
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import copy
# http://online.wsj.com/page/us_in_todays_paper.html # http://online.wsj.com/page/us_in_todays_paper.html
class WallStreetJournal(BasicNewsRecipe): class WallStreetJournal(BasicNewsRecipe):
title = 'The Wall Street Journal (US)' title = 'The Wall Street Journal'
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal, Sujata Raman, and Joshua Oster-Morris'
description = 'News and current affairs' description = 'News and current affairs'
needs_subscription = True needs_subscription = True
language = 'en' language = 'en'
@ -66,7 +66,17 @@ class WallStreetJournal(BasicNewsRecipe):
return soup return soup
def wsj_get_index(self): def wsj_get_index(self):
return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html') return self.index_to_soup('http://online.wsj.com/itp')
def wsj_add_feed(self,feeds,title,url):
self.log('Found section:', title)
if url.endswith('whatsnews'):
articles = self.wsj_find_wn_articles(url)
else:
articles = self.wsj_find_articles(url)
if articles:
feeds.append((title, articles))
return feeds
def parse_index(self): def parse_index(self):
soup = self.wsj_get_index() soup = self.wsj_get_index()
@ -75,24 +85,72 @@ class WallStreetJournal(BasicNewsRecipe):
if date is not None: if date is not None:
self.timefmt = ' [%s]'%self.tag_to_string(date) self.timefmt = ' [%s]'%self.tag_to_string(date)
sections = {} cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
sec_order = [] if cov is not None:
self.cover_url = cov['href']
feeds = []
div = soup.find('div', attrs={'class':'itpHeader'})
div = div.find('ul', attrs={'class':'tab'})
for a in div.findAll('a', href=lambda x: x and '/itp/' in x):
pageone = a['href'].endswith('pageone')
if pageone:
title = 'Front Section'
url = 'http://online.wsj.com' + a['href']
feeds = self.wsj_add_feed(feeds,title,url)
title = 'What''s News'
url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url)
else:
title = self.tag_to_string(a)
url = 'http://online.wsj.com' + a['href']
feeds = self.wsj_add_feed(feeds,title,url)
return feeds
def wsj_find_wn_articles(self, url):
soup = self.index_to_soup(url)
articles = []
whats_news = soup.find('div', attrs={'class':lambda x: x and 'whatsNews-simple' in x})
if whats_news is not None:
for a in whats_news.findAll('a', href=lambda x: x and '/article/' in x):
container = a.findParent(['p'])
meta = a.find(attrs={'class':'meta_sectionName'})
if meta is not None:
meta.extract()
title = self.tag_to_string(a).strip()
url = a['href']
desc = ''
if container is not None:
desc = self.tag_to_string(container)
articles.append({'title':title, 'url':url,
'description':desc, 'date':''})
self.log('\tFound WN article:', title)
return articles
def wsj_find_articles(self, url):
soup = self.index_to_soup(url)
whats_news = soup.find('div', attrs={'class':lambda x: x and 'whatsNews-simple' in x})
if whats_news is not None:
whats_news.extract()
articles = []
flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x})
if flavorarea is not None:
flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article'))
if flavorstory is not None:
flavorstory['class'] = 'mjLinkItem'
metapage = soup.find('span', attrs={'class':lambda x: x and 'meta_sectionName' in x})
if metapage is not None:
flavorstory.append( copy.copy(metapage) ) #metapage should always be A1 because that should be first on the page
for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True): for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
container = a.findParent(['li', 'div']) container = a.findParent(['li', 'div'])
if container.name == 'div':
section = 'Page One'
else:
section = ''
sec = container.find('a', href=lambda x: x and '/search?' in x)
if sec is not None:
section = self.tag_to_string(sec).strip()
if not section:
h = container.find(['h1','h2','h3','h4','h5','h6'])
section = self.tag_to_string(h)
section = string.capitalize(section).replace('U.s.', 'U.S.')
if section not in sections:
sections[section] = []
sec_order.append(section)
meta = a.find(attrs={'class':'meta_sectionName'}) meta = a.find(attrs={'class':'meta_sectionName'})
if meta is not None: if meta is not None:
meta.extract() meta.extract()
@ -103,30 +161,14 @@ class WallStreetJournal(BasicNewsRecipe):
if p is not None: if p is not None:
desc = self.tag_to_string(p) desc = self.tag_to_string(p)
sections[section].append({'title':title, 'url':url, articles.append({'title':title, 'url':url,
'description':desc, 'date':''}) 'description':desc, 'date':''})
self.log('Found article:', title) self.log('\tFound article:', title)
a.extract() return articles
for a in container.findAll('a', href=lambda x: x and '/article/'
in x):
url = a['href']
if not url.startswith('http:'):
url = 'http://online.wsj.com'+url
title = self.tag_to_string(a).strip()
if not title or title.startswith('['): continue
if title:
sections[section].append({'title':self.tag_to_string(a),
'url':url, 'description':'', 'date':''})
self.log('\tFound related:', title)
feeds = [(sec, sections[sec]) for sec in sec_order]
return feeds
def cleanup(self): def cleanup(self):
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com') self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')

View File

@ -15,22 +15,22 @@ class ZAOBAO(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
recursions = 1 recursions = 1
language = 'zh' language = 'zh'
encoding = 'gbk' encoding = 'gbk'
# multithreaded_fetch = True # multithreaded_fetch = True
keep_only_tags = [ keep_only_tags = [
dict(name='table', attrs={'cellpadding':'9'}), dict(name='td', attrs={'class':'text'}),
dict(name='table', attrs={'class':'cont'}),
dict(name='div', attrs={'id':'content'}),
dict(name='span', attrs={'class':'page'}), dict(name='span', attrs={'class':'page'}),
dict(name='div', attrs={'id':'content'})
] ]
remove_tags = [ remove_tags = [
dict(name='table', attrs={'cellspacing':'9'}), dict(name='table', attrs={'cellspacing':'9'}),
dict(name='fieldset'),
dict(name='div', attrs={'width':'30%'}),
] ]
extra_css = '\ extra_css = '\n\
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\
body{font-family: serif1, serif}\n\ body{font-family: serif1, serif}\n\
.article_description{font-family: serif1, serif}\n\ .article_description{font-family: serif1, serif}\n\
@ -41,7 +41,10 @@ class ZAOBAO(BasicNewsRecipe):
.article {font-size:medium}\n\ .article {font-size:medium}\n\
.navbar {font-size: small}\n\ .navbar {font-size: small}\n\
.feed{font-size: medium}\n\ .feed{font-size: medium}\n\
.small{font-size: small; padding-right: 8%}\n' .small{font-size: small;padding-right: 8pt}\n\
.text{padding-right: 8pt}\n\
p{text-indent: 0cm}\n\
div#content{padding-right: 10pt}'
INDEXES = [ INDEXES = [
(u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml') (u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml')
@ -51,27 +54,35 @@ class ZAOBAO(BasicNewsRecipe):
DESC_SENSE = u'\u8054\u5408\u65e9\u62a5\u7f51' DESC_SENSE = u'\u8054\u5408\u65e9\u62a5\u7f51'
feeds = [ feeds = [
(u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'), (u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'),
(u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'), (u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'),
(u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'), (u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'),
(u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'), (u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'),
(u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'), (u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'),
(u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'), (u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'),
(u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'), (u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'),
(u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'), (u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'),
(u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'), (u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'),
(u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'), (u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'),
(u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'), (u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'),
(u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'), (u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
] ]
def preprocess_html(self, soup):
for tag in soup.findAll(name='a'):
if tag.has_key('href'):
tag_url = tag['href']
if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
del tag['href']
return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']): for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div' tag.name = 'div'
return soup return soup
def parse_feeds(self): def parse_feeds(self):
self.log.debug('ZAOBAO overrided parse_feeds()') self.log_debug(_('ZAOBAO overrided parse_feeds()'))
parsed_feeds = BasicNewsRecipe.parse_feeds(self) parsed_feeds = BasicNewsRecipe.parse_feeds(self)
for id, obj in enumerate(self.INDEXES): for id, obj in enumerate(self.INDEXES):
@ -88,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
a_title = self.tag_to_string(a) a_title = self.tag_to_string(a)
date = '' date = ''
description = '' description = ''
self.log.debug('adding %s at %s'%(a_title,a_url)) self.log_debug(_('adding %s at %s')%(a_title,a_url))
articles.append({ articles.append({
'title':a_title, 'title':a_title,
'date':date, 'date':date,
@ -97,26 +108,25 @@ class ZAOBAO(BasicNewsRecipe):
}) })
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article, pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed, max_articles_per_feed=self.max_articles_per_feed)
log=self.log)
self.log.debug('adding %s to feed'%(title)) self.log_debug(_('adding %s to feed')%(title))
for feed in pfeeds: for feed in pfeeds:
self.log.debug('adding feed: %s'%(feed.title)) self.log_debug(_('adding feed: %s')%(feed.title))
feed.description = self.DESC_SENSE feed.description = self.DESC_SENSE
parsed_feeds.append(feed) parsed_feeds.append(feed)
for a, article in enumerate(feed): for a, article in enumerate(feed):
self.log.debug('added article %s from %s'%(article.title, article.url)) self.log_debug(_('added article %s from %s')%(article.title, article.url))
self.log.debug('added feed %s'%(feed.title)) self.log_debug(_('added feed %s')%(feed.title))
for i, feed in enumerate(parsed_feeds): for i, feed in enumerate(parsed_feeds):
# workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse() # workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
weired_encoding_detected = False weired_encoding_detected = False
if not isinstance(feed.description, unicode) and self.encoding and feed.description: if not isinstance(feed.description, unicode) and self.encoding and feed.description:
self.log.debug('Feed %s is not encoded correctly, manually replace it'%(feed.title)) self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
feed.description = feed.description.decode(self.encoding, 'replace') feed.description = feed.description.decode(self.encoding, 'replace')
elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description: elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
self.log.debug('Feed %s is strangely encoded, manually redo all'%(feed.title)) self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace') feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
weired_encoding_detected = True weired_encoding_detected = True
@ -138,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace') article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
if article.title == "Untitled article": if article.title == "Untitled article":
self.log.debug('Removing empty article %s from %s'%(article.title, article.url)) self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
# remove the article # remove the article
feed.articles[a:a+1] = [] feed.articles[a:a+1] = []
return parsed_feeds return parsed_feeds

View File

@ -406,3 +406,8 @@ img, object, svg|svg {
width: auto; width: auto;
height: auto; height: auto;
} }
/* These are needed because ADE renders anchors the same as links */
a { text-decoration: inherit; color: inherit; cursor: inherit }
a[href] { text-decoration: underline; color: blue; cursor: pointer }

View File

@ -40,13 +40,14 @@ class LinuxFreeze(Command):
'/usr/bin/pdftohtml', '/usr/bin/pdftohtml',
'/usr/lib/libwmflite-0.2.so.7', '/usr/lib/libwmflite-0.2.so.7',
'/usr/lib/liblcms.so.1', '/usr/lib/liblcms.so.1',
'/usr/lib/libstlport.so.5.1',
'/tmp/calibre-mount-helper', '/tmp/calibre-mount-helper',
'/usr/lib/libunrar.so', '/usr/lib/libunrar.so',
'/usr/lib/libchm.so.0', '/usr/lib/libchm.so.0',
'/usr/lib/libsqlite3.so.0', '/usr/lib/libsqlite3.so.0',
'/usr/lib/libsqlite3.so.0', '/usr/lib/libsqlite3.so.0',
'/usr/lib/libmng.so.1', '/usr/lib/libmng.so.1',
'/usr/lib/libpodofo.so.0.6.99', '/usr/lib/libpodofo.so.0.8.1',
'/lib/libz.so.1', '/lib/libz.so.1',
'/lib/libuuid.so.1', '/lib/libuuid.so.1',
'/usr/lib/libtiff.so.3', '/usr/lib/libtiff.so.3',

View File

@ -265,6 +265,9 @@ class Py2App(object):
@flush @flush
def get_local_dependencies(self, path_to_lib): def get_local_dependencies(self, path_to_lib):
for x in self.get_dependencies(path_to_lib): for x in self.get_dependencies(path_to_lib):
if x.startswith('libpodofo'):
yield x, x
continue
for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/', for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
'/opt/local/lib/', '/opt/local/lib/',
'/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'): '/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
@ -397,7 +400,7 @@ class Py2App(object):
@flush @flush
def add_podofo(self): def add_podofo(self):
info('\nAdding PoDoFo') info('\nAdding PoDoFo')
pdf = join(SW, 'lib', 'libpodofo.0.6.99.dylib') pdf = join(SW, 'lib', 'libpodofo.0.8.1.dylib')
self.install_dylib(pdf) self.install_dylib(pdf)
@flush @flush

View File

@ -162,9 +162,50 @@ SET(WANT_LIB64 FALSE)
SET(PODOFO_BUILD_SHARED TRUE) SET(PODOFO_BUILD_SHARED TRUE)
SET(PODOFO_BUILD_STATIC FALSE) SET(PODOFO_BUILD_STATIC FALSE)
cp build/podofo-0.7.0/build/src/Release/podofo.dll bin/ cp build/podofo/build/src/Release/podofo.dll bin/
cp build/podofo-0.7.0/build/src/Release/podofo.lib lib/ cp build/podofo/build/src/Release/podofo.lib lib/
cp build/podofo-0.7.0/build/src/Release/podofo.exp lib/ cp build/podofo/build/src/Release/podofo.exp lib/
cp build/podofo/build/podofo_config.h include/podofo/
cp -r build/podofo/src/* include/podofo/
The following patch was required to get it to compile:
Index: src/PdfImage.cpp
===================================================================
--- src/PdfImage.cpp (revision 1261)
+++ src/PdfImage.cpp (working copy)
@@ -627,7 +627,7 @@
long lLen = static_cast<long>(pInfo->rowbytes * height);
char* pBuffer = static_cast<char*>(malloc(sizeof(char) * lLen));
- png_bytep pRows[height];
+ png_bytepp pRows = static_cast<png_bytepp>(malloc(sizeof(png_bytep)*height));
for(int y=0; y<height; y++)
{
pRows[y] = reinterpret_cast<png_bytep>(pBuffer + (y * pInfo->rowbytes));
@@ -672,6 +672,7 @@
this->SetImageData( width, height, pInfo->bit_depth, &stream );
free(pBuffer);
+ free(pRows);
}
#endif // PODOFO_HAVE_PNG_LIB
Index: src/PdfFiltersPrivate.cpp
===================================================================
--- src/PdfFiltersPrivate.cpp (revision 1261)
+++ src/PdfFiltersPrivate.cpp (working copy)
@@ -1019,7 +1019,7 @@
/*
* Prepare for input from a memory buffer.
*/
-GLOBAL(void)
+void
jpeg_memory_src (j_decompress_ptr cinfo, const JOCTET * buffer, size_t bufsize)
{
my_src_ptr src;
ImageMagick ImageMagick
-------------- --------------

View File

@ -154,6 +154,10 @@
<CustomAction Id="LaunchApplication" BinaryKey="WixCA" <CustomAction Id="LaunchApplication" BinaryKey="WixCA"
DllEntry="WixShellExec" Impersonate="yes"/> DllEntry="WixShellExec" Impersonate="yes"/>
<InstallUISequence>
<FileCost Suppress="yes" />
</InstallUISequence>
</Product> </Product>
</Wix> </Wix>

View File

@ -30,6 +30,7 @@ mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs') mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml') mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg') mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('text/fb2+xml', '.fb2')
mimetypes.add_type('application/x-sony-bbeb', '.lrf') mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-sony-bbeb', '.lrx') mimetypes.add_type('application/x-sony-bbeb', '.lrx')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
@ -43,6 +44,7 @@ mimetypes.add_type('application/x-mobipocket-ebook', '.prc')
mimetypes.add_type('application/x-mobipocket-ebook', '.azw') mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
mimetypes.add_type('application/x-cbz', '.cbz') mimetypes.add_type('application/x-cbz', '.cbz')
mimetypes.add_type('application/x-cbr', '.cbr') mimetypes.add_type('application/x-cbr', '.cbr')
mimetypes.add_type('application/x-koboreader-ebook', '.kobo')
mimetypes.add_type('image/wmf', '.wmf') mimetypes.add_type('image/wmf', '.wmf')
guess_type = mimetypes.guess_type guess_type = mimetypes.guess_type
import cssutils import cssutils
@ -340,13 +342,6 @@ def detect_ncpus():
return ans return ans
def launch(path_or_url):
from PyQt4.QtCore import QUrl
from PyQt4.QtGui import QDesktopServices
if os.path.exists(path_or_url):
path_or_url = 'file:'+path_or_url
QDesktopServices.openUrl(QUrl(path_or_url))
relpath = os.path.relpath relpath = os.path.relpath
_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE) _spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
def english_sort(x, y): def english_sort(x, y):

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.7.2' __version__ = '0.7.8'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
from calibre.constants import numeric_version from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
# To archive plugins {{{
class HTML2ZIP(FileTypePlugin): class HTML2ZIP(FileTypePlugin):
name = 'HTML to ZIP' name = 'HTML to ZIP'
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -30,6 +31,7 @@ every time you add an HTML file to the library.\
with TemporaryDirectory('_plugin_html2zip') as tdir: with TemporaryDirectory('_plugin_html2zip') as tdir:
recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)] recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
if self.site_customization and self.site_customization.strip(): if self.site_customization and self.site_customization.strip():
recs.append(['input_encoding', self.site_customization.strip(), recs.append(['input_encoding', self.site_customization.strip(),
OptionRecommendation.HIGH]) OptionRecommendation.HIGH])
@ -81,7 +83,9 @@ class PML2PMLZ(FileTypePlugin):
return of.name return of.name
# }}}
# Metadata reader plugins {{{
class ComicMetadataReader(MetadataReaderPlugin): class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata' name = 'Read comic metadata'
@ -319,7 +323,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata from calibre.ebooks.metadata.zip import get_metadata
return get_metadata(stream) return get_metadata(stream)
# }}}
# Metadata writer plugins {{{
class EPUBMetadataWriter(MetadataWriterPlugin): class EPUBMetadataWriter(MetadataWriterPlugin):
@ -395,6 +401,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.topaz import set_metadata from calibre.ebooks.metadata.topaz import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
# }}}
from calibre.ebooks.comic.input import ComicInput from calibre.ebooks.comic.input import ComicInput
from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.epub.input import EPUBInput
@ -436,7 +443,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
from calibre.devices.cybook.driver import CYBOOK from calibre.devices.cybook.driver import CYBOOK
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \ from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \ POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
BOOQ, ELONEX, POCKETBOOK301 BOOQ, ELONEX, POCKETBOOK301, MENTOR
from calibre.devices.iliad.driver import ILIAD from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK from calibre.devices.jetbook.driver import JETBOOK
@ -444,7 +451,7 @@ from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
from calibre.devices.nook.driver import NOOK from calibre.devices.nook.driver import NOOK
from calibre.devices.prs505.driver import PRS505 from calibre.devices.prs505.driver import PRS505
from calibre.devices.android.driver import ANDROID, S60 from calibre.devices.android.driver import ANDROID, S60
from calibre.devices.nokia.driver import N770, N810, E71X from calibre.devices.nokia.driver import N770, N810, E71X, E52
from calibre.devices.eslick.driver import ESLICK, EBK52 from calibre.devices.eslick.driver import ESLICK, EBK52
from calibre.devices.nuut2.driver import NUUT2 from calibre.devices.nuut2.driver import NUUT2
from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.iriver.driver import IRIVER_STORY
@ -453,7 +460,7 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
from calibre.devices.edge.driver import EDGE from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, AVANT from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO from calibre.devices.kobo.driver import KOBO
@ -461,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
LibraryThing LibraryThing
from calibre.ebooks.metadata.douban import DoubanBooks from calibre.ebooks.metadata.douban import DoubanBooks
from calibre.library.catalog import CSV_XML, EPUB_MOBI from calibre.library.catalog import CSV_XML, EPUB_MOBI
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI] LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
plugins += [ plugins += [
ComicInput, ComicInput,
EPUBInput, EPUBInput,
@ -499,7 +509,6 @@ plugins += [
] ]
# Order here matters. The first matched device is the one used. # Order here matters. The first matched device is the one used.
plugins += [ plugins += [
ITUNES,
HANLINV3, HANLINV3,
HANLINV5, HANLINV5,
BLACKBERRY, BLACKBERRY,
@ -520,6 +529,7 @@ plugins += [
S60, S60,
N770, N770,
E71X, E71X,
E52,
N810, N810,
COOL_ER, COOL_ER,
ESLICK, ESLICK,
@ -550,6 +560,10 @@ plugins += [
AZBOOKA, AZBOOKA,
FOLDER_DEVICE_FOR_CONFIG, FOLDER_DEVICE_FOR_CONFIG,
AVANT, AVANT,
MENTOR,
SWEEX,
PDNOVEL,
ITUNES,
] ]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]

View File

@ -36,7 +36,7 @@ class Plugin(_Plugin):
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name) self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num) self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
# Input profiles {{{
class InputProfile(Plugin): class InputProfile(Plugin):
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -218,6 +218,8 @@ input_profiles = [InputProfile, SonyReaderInput, SonyReader300Input,
input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower())) input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
# }}}
class OutputProfile(Plugin): class OutputProfile(Plugin):
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -237,11 +239,12 @@ class OutputProfile(Plugin):
# If True the MOBI renderer on the device supports MOBI indexing # If True the MOBI renderer on the device supports MOBI indexing
supports_mobi_indexing = False supports_mobi_indexing = False
# Device supports displaying a nested TOC
supports_nested_toc = True
# If True output should be optimized for a touchscreen interface # If True output should be optimized for a touchscreen interface
touchscreen = False touchscreen = False
touchscreen_news_css = ''
# A list of extra (beyond CSS 2.1) modules supported by the device
# Format is a cssutils profile dictionary (see iPad for example)
extra_css_modules = []
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
@ -256,8 +259,151 @@ class iPadOutput(OutputProfile):
screen_size = (768, 1024) screen_size = (768, 1024)
comic_screen_size = (768, 1024) comic_screen_size = (768, 1024)
dpi = 132.0 dpi = 132.0
supports_nested_toc = False extra_css_modules = [
{
'name':'webkit',
'props': { '-webkit-border-bottom-left-radius':'{length}',
'-webkit-border-bottom-right-radius':'{length}',
'-webkit-border-top-left-radius':'{length}',
'-webkit-border-top-right-radius':'{length}',
'-webkit-border-radius': r'{border-width}(\s+{border-width}){0,3}|inherit',
},
'macros': {'border-width': '{length}|medium|thick|thin'}
}
]
touchscreen = True touchscreen = True
# touchscreen_news_css {{{
touchscreen_news_css = u'''
/* hr used in articles */
.article_articles_list {
width:18%;
}
.article_link {
color: #593f29;
font-style: italic;
}
.article_next {
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
font-style: italic;
width:32%;
}
.article_prev {
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
font-style: italic;
width:32%;
}
.article_sections_list {
width:18%;
}
.articles_link {
font-weight: bold;
}
.sections_link {
font-weight: bold;
}
.caption_divider {
border:#ccc 1px solid;
}
.touchscreen_navbar {
background:#c3bab2;
border:#ccc 0px solid;
border-collapse:separate;
border-spacing:1px;
margin-left: 5%;
margin-right: 5%;
width: 90%;
-webkit-border-radius:4px;
}
.touchscreen_navbar td {
background:#fff;
font-family:Helvetica;
font-size:80%;
/* UI touchboxes use 8px padding */
padding: 6px;
text-align:center;
}
.touchscreen_navbar td a:link {
color: #593f29;
text-decoration: none;
}
/* Index formatting */
.publish_date {
text-align:center;
}
.divider {
border-bottom:1em solid white;
border-top:1px solid gray;
}
hr.caption_divider {
border-color:black;
border-style:solid;
border-width:1px;
}
/* Feed summary formatting */
.article_summary {
display:inline-block;
}
.feed {
font-family:sans-serif;
font-weight:bold;
font-size:larger;
}
.feed_link {
font-style: italic;
}
.feed_next {
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
font-style: italic;
width:40%;
}
.feed_prev {
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
font-style: italic;
width:40%;
}
.feed_title {
text-align: center;
font-size: 160%;
}
.feed_up {
font-weight: bold;
width:20%;
}
.summary_headline {
font-weight:bold;
text-align:left;
}
.summary_byline {
text-align:left;
font-family:monospace;
}
.summary_text {
text-align:left;
}
'''
# }}}
class SonyReaderOutput(OutputProfile): class SonyReaderOutput(OutputProfile):
@ -279,6 +425,7 @@ class KoboReaderOutput(OutputProfile):
description = _('This profile is intended for the Kobo Reader.') description = _('This profile is intended for the Kobo Reader.')
screen_size = (590, 775) screen_size = (590, 775)
comic_screen_size = (540, 718)
dpi = 168.451 dpi = 168.451
fbase = 12 fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24] fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]

View File

@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.fetch import MetadataSource from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \ from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
plugin_dir, OptionParser, prefs plugin_dir, OptionParser, prefs
from calibre.ebooks.epub.fix import ePubFixer
platform = 'linux' platform = 'linux'
@ -151,13 +152,13 @@ def reread_filetype_plugins():
def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'): def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
occasion = {'import':_on_import, 'preprocess':_on_preprocess, occasion_plugins = {'import':_on_import, 'preprocess':_on_preprocess,
'postprocess':_on_postprocess}[occasion] 'postprocess':_on_postprocess}[occasion]
customization = config['plugin_customization'] customization = config['plugin_customization']
if ft is None: if ft is None:
ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '') ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
nfp = path_to_file nfp = path_to_file
for plugin in occasion.get(ft, []): for plugin in occasion_plugins.get(ft, []):
if is_disabled(plugin): if is_disabled(plugin):
continue continue
plugin.site_customization = customization.get(plugin.name, '') plugin.site_customization = customization.get(plugin.name, '')
@ -194,7 +195,6 @@ def plugin_customization(plugin):
# }}} # }}}
# Input/Output profiles {{{ # Input/Output profiles {{{
def input_profiles(): def input_profiles():
for plugin in _initialized_plugins: for plugin in _initialized_plugins:
@ -444,6 +444,14 @@ def device_plugins(): # {{{
yield plugin yield plugin
# }}} # }}}
# epub fixers {{{
def epub_fixers():
for plugin in _initialized_plugins:
if isinstance(plugin, ePubFixer):
if not is_disabled(plugin):
if platform in plugin.supported_platforms:
yield plugin
# }}}
# Initialize plugins {{{ # Initialize plugins {{{

View File

@ -34,6 +34,12 @@ class ANDROID(USBMS):
# Acer # Acer
0x502 : { 0x3203 : [0x0100]}, 0x502 : { 0x3203 : [0x0100]},
# Dell
0x413c : { 0xb007 : [0x0100]},
# Eken?
0x040d : { 0x0851 : [0x0001]},
} }
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books'] EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -42,11 +48,12 @@ class ANDROID(USBMS):
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN) EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG'] 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD',
'PROD_GT-I9000'] 'GT-I9000', 'FILE-STOR_GADGET']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'PROD_GT-I9000_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD',
'FILE-STOR_GADGET']
OSX_MAIN_MEM = 'HTC Android Phone Media' OSX_MAIN_MEM = 'HTC Android Phone Media'

File diff suppressed because it is too large Load Diff

View File

@ -186,6 +186,15 @@ class BOOQ(EB600):
WINDOWS_MAIN_MEM = 'EB600' WINDOWS_MAIN_MEM = 'EB600'
WINDOWS_CARD_A_MEM = 'EB600' WINDOWS_CARD_A_MEM = 'EB600'
class MENTOR(EB600):
name = 'Astak Mentor EB600'
gui_name = 'Mentor'
description = _('Communicate with the Astak Mentor EB600')
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'pdf', 'txt']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MENTOR'
class ELONEX(EB600): class ELONEX(EB600):
name = 'Elonex 600EB' name = 'Elonex 600EB'

View File

@ -66,7 +66,7 @@ class FOLDER_DEVICE(USBMS):
detected_device=None): detected_device=None):
pass pass
def disconnect_from_folder(self): def unmount_device(self):
self._main_prefix = '' self._main_prefix = ''
self.is_connected = False self.is_connected = False

View File

@ -106,9 +106,11 @@ class BOOX(HANLINV3):
description = _('Communicate with the BOOX eBook reader.') description = _('Communicate with the BOOX eBook reader.')
author = 'Jesus Manuel Marinho Valcarce' author = 'Jesus Manuel Marinho Valcarce'
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
METADATA_CACHE = '.metadata.calibre'
# Ordered list of supported formats # Ordered list of supported formats
FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm'] FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
'prc', 'chm', 'doc']
VENDOR_ID = [0x0525] VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5] PRODUCT_ID = [0xa4a5]

View File

@ -24,7 +24,7 @@ class N516(USBMS):
VENDOR_ID = [0x0525] VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5] PRODUCT_ID = [0xa4a5]
BCD = [0x323, 0x326] BCD = [0x323, 0x326, 0x327]
VENDOR_NAME = 'INGENIC' VENDOR_NAME = 'INGENIC'
WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE' WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'

View File

@ -59,7 +59,7 @@ class DevicePlugin(Plugin):
return cls.__name__ return cls.__name__
return cls.name return cls.name
# Device detection {{{
def test_bcd_windows(self, device_id, bcd): def test_bcd_windows(self, device_id, bcd):
if bcd is None or len(bcd) == 0: if bcd is None or len(bcd) == 0:
return True return True
@ -152,6 +152,7 @@ class DevicePlugin(Plugin):
return True, dev return True, dev
return False, None return False, None
# }}}
def reset(self, key='-1', log_packets=False, report_progress=None, def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None) : detected_device=None) :
@ -372,14 +373,12 @@ class DevicePlugin(Plugin):
@classmethod @classmethod
def settings(cls): def settings(cls):
''' '''
Should return an opts object. The opts object should have one attribute Should return an opts object. The opts object should have at least one attribute
`format_map` which is an ordered list of formats for the device. `format_map` which is an ordered list of formats for the device.
''' '''
raise NotImplementedError() raise NotImplementedError()
class BookList(list): class BookList(list):
''' '''
A list of books. Each Book object must have the fields: A list of books. Each Book object must have the fields:

View File

@ -213,7 +213,7 @@ class KINDLE_DX(KINDLE2):
PRODUCT_ID = [0x0003] PRODUCT_ID = [0x0003]
BCD = [0x0100] BCD = [0x0100]
class Bookmark(): class Bookmark(): # {{{
''' '''
A simple class fetching bookmark data A simple class fetching bookmark data
Kindle-specific Kindle-specific
@ -429,6 +429,7 @@ class Bookmark():
entries, = unpack('>I', data[9:13]) entries, = unpack('>I', data[9:13])
current_entry = 0 current_entry = 0
e_base = 0x0d e_base = 0x0d
self.pdf_page_offset = 0
while current_entry < entries: while current_entry < entries:
''' '''
location, = unpack('>I', data[e_base+2:e_base+6]) location, = unpack('>I', data[e_base+2:e_base+6])
@ -516,3 +517,6 @@ class Bookmark():
else: else:
print "unsupported bookmark_extension: %s" % self.bookmark_extension print "unsupported bookmark_extension: %s" % self.bookmark_extension
# }}}

View File

@ -0,0 +1,116 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com>'
'''
'''
import os
import re
import time
from calibre.ebooks.metadata import MetaInformation
from calibre.constants import filesystem_encoding, preferred_encoding
from calibre import isbytestring
class Book(MetaInformation):
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
'title_sort', 'comments', 'category', 'publisher', 'series',
'series_index', 'rating', 'isbn', 'language', 'application_id',
'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
'uuid',
]
def __init__(self, prefix, lpath, title, authors, mime, date, ContentType, thumbnail_name, other=None):
MetaInformation.__init__(self, '')
self.device_collections = []
self._new_book = False
self.path = os.path.join(prefix, lpath)
if os.sep == '\\':
self.path = self.path.replace('/', '\\')
self.lpath = lpath.replace('\\', '/')
else:
self.lpath = lpath
self.title = title
if not authors:
self.authors = ['']
else:
self.authors = [authors]
self.mime = mime
try:
self.size = os.path.getsize(self.path)
except OSError:
self.size = 0
try:
if ContentType == '6':
self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
else:
self.datetime = time.gmtime(os.path.getctime(self.path))
except:
self.datetime = time.gmtime()
if thumbnail_name is not None:
self.thumbnail = ImageWrapper(thumbnail_name)
self.tags = []
if other:
self.smart_update(other)
def __eq__(self, other):
return self.path == getattr(other, 'path', None)
@dynamic_property
def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.lpath.rpartition('.')[0])
if match:
return int(match.group(1))
return None
return property(fget=fget, doc=doc)
@dynamic_property
def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
return property(doc=doc, fget=fget)
@dynamic_property
def thumbnail(self):
return None
def smart_update(self, other):
'''
Merge the information in C{other} into self. In case of conflicts, the information
in C{other} takes precedence, unless the information in C{other} is NULL.
'''
MetaInformation.smart_update(self, other)
for attr in self.BOOK_ATTRS:
if hasattr(other, attr):
val = getattr(other, attr, None)
setattr(self, attr, val)
def to_json(self):
json = {}
for attr in self.JSON_ATTRS:
val = getattr(self, attr)
if isbytestring(val):
enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
val = val.decode(enc, 'replace')
elif isinstance(val, (list, tuple)):
val = [x.decode(preferred_encoding, 'replace') if
isbytestring(x) else x for x in val]
json[attr] = val
return json
class ImageWrapper(object):
def __init__(self, image_path):
self.image_path = image_path

View File

@ -2,17 +2,26 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
import sqlite3 as sqlite
from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book
from calibre.devices.kobo.books import ImageWrapper
from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre import prints
class KOBO(USBMS): class KOBO(USBMS):
name = 'Kobo Reader Device Interface' name = 'Kobo Reader Device Interface'
gui_name = 'Kobo Reader' gui_name = 'Kobo Reader'
description = _('Communicate with the Kobo Reader') description = _('Communicate with the Kobo Reader')
author = 'Kovid Goyal' author = 'Timothy Legge and Kovid Goyal'
version = (1, 0, 4)
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
@ -29,3 +38,320 @@ class KOBO(USBMS):
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
def initialize(self):
USBMS.initialize(self)
self.book_class = Book
def books(self, oncard=None, end_session=True):
from calibre.ebooks.metadata.meta import path_to_ext
dummy_bl = BookList(None, None, None)
if oncard == 'carda' and not self._card_a_prefix:
self.report_progress(1.0, _('Getting list of books on device...'))
return dummy_bl
elif oncard == 'cardb' and not self._card_b_prefix:
self.report_progress(1.0, _('Getting list of books on device...'))
return dummy_bl
elif oncard and oncard != 'carda' and oncard != 'cardb':
self.report_progress(1.0, _('Getting list of books on device...'))
return dummy_bl
prefix = self._card_a_prefix if oncard == 'carda' else \
self._card_b_prefix if oncard == 'cardb' \
else self._main_prefix
# get the metadata cache
bl = self.booklist_class(oncard, prefix, self.settings)
need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
# make a dict cache of paths so the lookup in the loop below is faster.
bl_cache = {}
for idx,b in enumerate(bl):
bl_cache[b.lpath] = idx
def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID):
changed = False
# if path_to_ext(path) in self.FORMATS:
try:
lpath = path.partition(self.normalize_path(prefix))[2]
if lpath.startswith(os.sep):
lpath = lpath[len(os.sep):]
lpath = lpath.replace('\\', '/')
# print "LPATH: " + lpath
path = self.normalize_path(path)
# print "Normalized FileName: " + path
idx = bl_cache.get(lpath, None)
if idx is not None:
if ImageID is not None:
imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
#print "Image name Normalized: " + imagename
if imagename is not None:
bl[idx].thumbnail = ImageWrapper(imagename)
bl_cache[lpath] = None
if ContentType != '6':
if self.update_metadata_item(bl[idx]):
# print 'update_metadata_item returned true'
changed = True
else:
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
# print 'Update booklist'
if bl.add_book(book, replace_metadata=False):
changed = True
except: # Probably a path encoding error
import traceback
traceback.print_exc()
return changed
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
cursor = connection.cursor()
#query = 'select count(distinct volumeId) from volume_shortcovers'
#cursor.execute(query)
#for row in (cursor):
# numrows = row[0]
#cursor.close()
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID from content where BookID is Null'
cursor.execute (query)
changed = False
for i, row in enumerate(cursor):
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
path = self.path_from_contentid(row[3], row[5], oncard)
mime = mime_type_ext(path_to_ext(row[3]))
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
# print "shortbook: " + path
elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
if changed:
need_sync = True
cursor.close()
connection.close()
# Remove books that are no longer in the filesystem. Cache contains
# indices into the booklist if book not in filesystem, None otherwise
# Do the operation in reverse order so indices remain valid
for idx in sorted(bl_cache.itervalues(), reverse=True):
if idx is not None:
need_sync = True
del bl[idx]
#print "count found in cache: %d, count of files in metadata: %d, need_sync: %s" % \
# (len(bl_cache), len(bl), need_sync)
if need_sync: #self.count_found_in_bl != len(bl) or need_sync:
if oncard == 'cardb':
self.sync_booklists((None, None, bl))
elif oncard == 'carda':
self.sync_booklists((None, bl, None))
else:
self.sync_booklists((bl, None, None))
self.report_progress(1.0, _('Getting list of books on device...'))
return bl
def delete_via_sql(self, ContentID, ContentType):
# Delete Order:
# 1) shortcover_page
# 2) volume_shorcover
# 2) content
connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
cursor = connection.cursor()
t = (ContentID,)
cursor.execute('select ImageID from content where ContentID = ?', t)
ImageID = None
for row in cursor:
# First get the ImageID to delete the images
ImageID = row[0]
cursor.close()
cursor = connection.cursor()
if ContentType == 6:
# Delete the shortcover_pages first
cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
#Delete the volume_shortcovers second
cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
# Delete the chapters associated with the book next
t = (ContentID,ContentID,)
cursor.execute('delete from content where BookID = ? or ContentID = ?', t)
connection.commit()
cursor.close()
if ImageID != None:
print "Error condition ImageID was not found"
print "You likely tried to delete a book that the kobo has not yet added to the database"
connection.close()
# If all this succeeds we need to delete the images files via the ImageID
return ImageID
def delete_images(self, ImageID):
if ImageID != None:
path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed',)
for ending in file_endings:
fpath = path + ending
fpath = self.normalize_path(fpath)
if os.path.exists(fpath):
# print 'Image File Exists: ' + fpath
os.unlink(fpath)
def delete_books(self, paths, end_session=True):
for i, path in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
path = self.normalize_path(path)
# print "Delete file normalized path: " + path
extension = os.path.splitext(path)[1]
if extension == '.kobo':
# Kobo books do not have book files. They do have some images though
#print "kobo book"
ContentType = 6
ContentID = self.contentid_from_path(path, ContentType)
elif extension == '.pdf' or extension == '.epub':
# print "ePub or pdf"
ContentType = 16
#print "Path: " + path
ContentID = self.contentid_from_path(path, ContentType)
# print "ContentID: " + ContentID
else: # if extension == '.html' or extension == '.txt':
ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
ContentID = self.contentid_from_path(path, ContentType)
ImageID = self.delete_via_sql(ContentID, ContentType)
#print " We would now delete the Images for" + ImageID
self.delete_images(ImageID)
if os.path.exists(path):
# Delete the ebook
# print "Delete the ebook: " + path
os.unlink(path)
filepath = os.path.splitext(path)[0]
for ext in self.DELETE_EXTS:
if os.path.exists(filepath + ext):
# print "Filename: " + filename
os.unlink(filepath + ext)
if os.path.exists(path + ext):
# print "Filename: " + filename
os.unlink(path + ext)
if self.SUPPORTS_SUB_DIRS:
try:
# print "removed"
os.removedirs(os.path.dirname(path))
except:
pass
self.report_progress(1.0, _('Removing books from device...'))
def remove_books_from_metadata(self, paths, booklists):
for i, path in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
for bl in booklists:
for book in bl:
#print "Book Path: " + book.path
if path.endswith(book.path):
#print " Remove: " + book.path
bl.remove_book(book)
self.report_progress(1.0, _('Removing books from device metadata listing...'))
def add_books_to_metadata(self, locations, metadata, booklists):
metadata = iter(metadata)
for i, location in enumerate(locations):
self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
info = metadata.next()
blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
# Extract the correct prefix from the pathname. To do this correctly,
# we must ensure that both the prefix and the path are normalized
# so that the comparison will work. Book's __init__ will fix up
# lpath, so we don't need to worry about that here.
path = self.normalize_path(location[0])
if self._main_prefix:
prefix = self._main_prefix if \
path.startswith(self.normalize_path(self._main_prefix)) else None
if not prefix and self._card_a_prefix:
prefix = self._card_a_prefix if \
path.startswith(self.normalize_path(self._card_a_prefix)) else None
if not prefix and self._card_b_prefix:
prefix = self._card_b_prefix if \
path.startswith(self.normalize_path(self._card_b_prefix)) else None
if prefix is None:
prints('in add_books_to_metadata. Prefix is None!', path,
self._main_prefix)
continue
#print "Add book to metatdata: "
#print "prefix: " + prefix
lpath = path.partition(prefix)[2]
if lpath.startswith('/') or lpath.startswith('\\'):
lpath = lpath[1:]
#print "path: " + lpath
#book = self.book_class(prefix, lpath, other=info)
lpath = self.normalize_path(prefix + lpath)
book = Book(prefix, lpath, '', '', '', '', '', '', other=info)
if book.size is None:
book.size = os.stat(self.normalize_path(path)).st_size
booklists[blist].add_book(book, replace_metadata=True)
self.report_progress(1.0, _('Adding books to device metadata listing...'))
def contentid_from_path(self, path, ContentType):
if ContentType == 6:
ContentID = os.path.splitext(path)[0]
# Remove the prefix on the file. it could be either
ContentID = ContentID.replace(self._main_prefix, '')
if self._card_a_prefix is not None:
ContentID = ContentID.replace(self._card_a_prefix, '')
elif ContentType == 999: # HTML Files
ContentID = path
ContentID = ContentID.replace(self._main_prefix, "/mnt/onboard/")
if self._card_a_prefix is not None:
ContentID = ContentID.replace(self._card_a_prefix, "/mnt/sd/")
else: # ContentType = 16
ContentID = path
ContentID = ContentID.replace(self._main_prefix, "file:///mnt/onboard/")
if self._card_a_prefix is not None:
ContentID = ContentID.replace(self._card_a_prefix, "file:///mnt/sd/")
ContentID = ContentID.replace("\\", '/')
return ContentID
def path_from_contentid(self, ContentID, ContentType, oncard):
path = ContentID
if oncard == 'cardb':
print 'path from_contentid cardb'
elif oncard == 'carda':
path = path.replace("file:///mnt/sd/", self._card_a_prefix)
# print "SD Card: " + filename
else:
if ContentType == "6":
# This is a hack as the kobo files do not exist
# but the path is required to make a unique id
# for calibre's reference
path = self._main_prefix + path + '.kobo'
# print "Path: " + path
else:
# if path.startswith("file:///mnt/onboard/"):
path = path.replace("file:///mnt/onboard/", self._main_prefix)
path = path.replace("/mnt/onboard/", self._main_prefix)
# print "Internal: " + filename
return path

View File

@ -49,3 +49,41 @@ class AVANT(USBMS):
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
class SWEEX(USBMS):
name = 'Sweex Device Interface'
gui_name = 'Sweex'
description = _('Communicate with the Sweex MM300')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
VENDOR_ID = [0x0525]
PRODUCT_ID = [0xa4a5]
BCD = [0x0319]
VENDOR_NAME = 'SWEEX'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True
class PDNOVEL(USBMS):
name = 'Pandigital Novel device interface'
gui_name = 'PD Novel'
description = _('Communicate with the Pandigital Novel')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'linux', 'osx']
FORMATS = ['epub', 'pdf']
VENDOR_ID = [0x18d1]
PRODUCT_ID = [0xb004]
BCD = [0x224]
VENDOR_NAME = 'ANDROID'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '__UMS_COMPOSITE'
EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = False

View File

@ -67,3 +67,24 @@ class E71X(USBMS):
VENDOR_NAME = 'NOKIA' VENDOR_NAME = 'NOKIA'
WINDOWS_MAIN_MEM = 'S60' WINDOWS_MAIN_MEM = 'S60'
class E52(USBMS):
name = 'Nokia E52 device interface'
gui_name = 'Nokia E52'
description = _('Communicate with the Nokia E52')
author = 'David Ignjic'
supported_platforms = ['windows', 'linux', 'osx']
VENDOR_ID = [0x421]
PRODUCT_ID = [0x1CD]
BCD = [0x100]
FORMATS = ['mobi', 'prc']
EBOOK_DIR_MAIN = 'eBooks'
SUPPORTS_SUB_DIRS = True
VENDOR_NAME = 'NOKIA'
WINDOWS_MAIN_MEM = 'S60'

View File

@ -99,7 +99,7 @@ class PRS505(USBMS):
if self._card_b_prefix is not None: if self._card_b_prefix is not None:
if not write_cache(self._card_b_prefix): if not write_cache(self._card_b_prefix):
self._card_b_prefix = None self._card_b_prefix = None
self.booklist_class.rebuild_collections = self.rebuild_collections
def get_device_information(self, end_session=True): def get_device_information(self, end_session=True):
return (self.gui_name, '', '', '') return (self.gui_name, '', '', '')
@ -145,7 +145,7 @@ class PRS505(USBMS):
blists[i] = booklists[i] blists[i] = booklists[i]
opts = self.settings() opts = self.settings()
if opts.extra_customization: if opts.extra_customization:
collections = [x.strip() for x in collections = [x.lower().strip() for x in
opts.extra_customization.split(',')] opts.extra_customization.split(',')]
else: else:
collections = [] collections = []
@ -156,4 +156,10 @@ class PRS505(USBMS):
USBMS.sync_booklists(self, booklists, end_session=end_session) USBMS.sync_booklists(self, booklists, end_session=end_session)
debug_print('PRS505: finished sync_booklists') debug_print('PRS505: finished sync_booklists')
def rebuild_collections(self, booklist, oncard):
debug_print('PRS505: started rebuild_collections on card', oncard)
c = self.initialize_XML_cache()
c.rebuild_collections(booklist, {'carda':1, 'cardb':2}.get(oncard, 0))
c.write()
debug_print('PRS505: finished rebuild_collections')

View File

@ -6,10 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, time import os, time
from pprint import pprint
from base64 import b64decode from base64 import b64decode
from uuid import uuid4 from uuid import uuid4
from lxml import etree from lxml import etree
from calibre import prints, guess_type from calibre import prints, guess_type
@ -62,8 +60,7 @@ class XMLCache(object):
def __init__(self, paths, prefixes, use_author_sort): def __init__(self, paths, prefixes, use_author_sort):
if DEBUG: if DEBUG:
debug_print('Building XMLCache...') debug_print('Building XMLCache...', paths)
pprint(paths)
self.paths = paths self.paths = paths
self.prefixes = prefixes self.prefixes = prefixes
self.use_author_sort = use_author_sort self.use_author_sort = use_author_sort
@ -147,39 +144,73 @@ class XMLCache(object):
if title+str(i) not in seen: if title+str(i) not in seen:
title = title+str(i) title = title+str(i)
playlist.set('title', title) playlist.set('title', title)
seen.add(title)
break break
else: else:
seen.add(title) seen.add(title)
def get_playlist_map(self): def build_id_playlist_map(self, bl_index):
debug_print('Start get_playlist_map') '''
ans = {} Return a map of the collections in books: {lpaths: [collection names]}
'''
debug_print('Start build_id_playlist_map')
self.ensure_unique_playlist_titles() self.ensure_unique_playlist_titles()
debug_print('after ensure_unique_playlist_titles')
self.prune_empty_playlists() self.prune_empty_playlists()
debug_print('get_playlist_map loop') debug_print('after cleaning playlists')
for i, root in self.record_roots.items(): root = self.record_roots[bl_index]
debug_print('get_playlist_map loop', i) if root is None:
id_map = self.build_id_map(root) return
ans[i] = [] id_map = self.build_id_map(root)
for playlist in root.xpath('//*[local-name()="playlist"]'): playlist_map = {}
items = [] # foreach playlist, get the lpaths for the ids in it, then add to dict
for item in playlist: for playlist in root.xpath('//*[local-name()="playlist"]'):
id_ = item.get('id', None) name = playlist.get('title')
record = id_map.get(id_, None) if name is None:
if record is not None: debug_print('build_id_playlist_map: unnamed playlist!')
items.append(record) continue
ans[i].append((playlist.get('title'), items)) for item in playlist:
debug_print('end get_playlist_map') # translate each id into its lpath
return ans id_ = item.get('id', None)
if id_ is None:
debug_print('build_id_playlist_map: id_ is None!')
continue
bk = id_map.get(id_, None)
if bk is None:
debug_print('build_id_playlist_map: book is None!', id_)
continue
lpath = bk.get('path', None)
if lpath is None:
debug_print('build_id_playlist_map: lpath is None!', id_)
continue
if lpath not in playlist_map:
playlist_map[lpath] = []
playlist_map[lpath].append(name)
debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
return playlist_map
def reset_existing_playlists_map(self):
'''
Call this method before calling get_or_create_playlist in the context of
a given job. Call it again after deleting any playlists. The current
implementation adds all new playlists before deleting any, so that
constraint is respected.
'''
self._playlist_to_playlist_id_map = {}
def get_or_create_playlist(self, bl_idx, title): def get_or_create_playlist(self, bl_idx, title):
# maintain a private map of playlists to their ids. Don't check if it
# exists, because reset_existing_playlist_map must be called before it
# is used to ensure that deleted playlists are taken into account
root = self.record_roots[bl_idx] root = self.record_roots[bl_idx]
for playlist in root.xpath('//*[local-name()="playlist"]'): if bl_idx not in self._playlist_to_playlist_id_map:
if playlist.get('title', None) == title: self._playlist_to_playlist_id_map[bl_idx] = {}
return playlist for playlist in root.xpath('//*[local-name()="playlist"]'):
if DEBUG: pl_title = playlist.get('title', None)
debug_print('Creating playlist:', title) if pl_title is not None:
self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
if title in self._playlist_to_playlist_id_map[bl_idx]:
return self._playlist_to_playlist_id_map[bl_idx][title]
debug_print('Creating playlist:', title)
ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx], ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
nsmap=root.nsmap, attrib={ nsmap=root.nsmap, attrib={
'uuid' : uuid(), 'uuid' : uuid(),
@ -188,12 +219,12 @@ class XMLCache(object):
'sourceid': '1' 'sourceid': '1'
}) })
root.append(ans) root.append(ans)
self._playlist_to_playlist_id_map[bl_idx][title] = ans
return ans return ans
# }}} # }}}
def fix_ids(self): # {{{ def fix_ids(self): # {{{
if DEBUG: debug_print('Running fix_ids()')
debug_print('Running fix_ids()')
def ensure_numeric_ids(root): def ensure_numeric_ids(root):
idmap = {} idmap = {}
@ -251,7 +282,9 @@ class XMLCache(object):
ensure_media_xml_base_ids(root) ensure_media_xml_base_ids(root)
idmap = ensure_numeric_ids(root) idmap = ensure_numeric_ids(root)
remap_playlist_references(root, idmap) if len(idmap) > 0:
debug_print('fix_ids: found some non-numeric ids')
remap_playlist_references(root, idmap)
if i == 0: if i == 0:
sourceid, playlist_sid = 1, 0 sourceid, playlist_sid = 1, 0
base = 0 base = 0
@ -276,38 +309,19 @@ class XMLCache(object):
def update_booklist(self, bl, bl_index): def update_booklist(self, bl, bl_index):
if bl_index not in self.record_roots: if bl_index not in self.record_roots:
return return
if DEBUG: debug_print('Updating JSON cache:', bl_index)
debug_print('Updating JSON cache:', bl_index) playlist_map = self.build_id_playlist_map(bl_index)
root = self.record_roots[bl_index] root = self.record_roots[bl_index]
pmap = self.get_playlist_map()[bl_index]
playlist_map = {}
for title, records in pmap:
for record in records:
path = record.get('path', None)
if path:
if path not in playlist_map:
playlist_map[path] = []
playlist_map[path].append(title)
lpath_map = self.build_lpath_map(root) lpath_map = self.build_lpath_map(root)
for book in bl: for book in bl:
record = lpath_map.get(book.lpath, None) record = lpath_map.get(book.lpath, None)
if record is not None: if record is not None:
title = record.get('title', None) title = record.get('title', None)
if title is not None and title != book.title: if title is not None and title != book.title:
if DEBUG: debug_print('Renaming title', book.title, 'to', title)
debug_print('Renaming title', book.title, 'to', title)
book.title = title book.title = title
# We shouldn't do this for Sonys, because the reader strips # Don't set the author, because the reader strips all but
# all but the first author. # the first author.
# authors = record.get('author', None)
# if authors is not None:
# authors = string_to_authors(authors)
# if authors != book.authors:
# if DEBUG:
# prints('Renaming authors', book.authors, 'to',
# authors)
# book.authors = authors
for thumbnail in record.xpath( for thumbnail in record.xpath(
'descendant::*[local-name()="thumbnail"]'): 'descendant::*[local-name()="thumbnail"]'):
for img in thumbnail.xpath( for img in thumbnail.xpath(
@ -318,47 +332,57 @@ class XMLCache(object):
book.thumbnail = raw book.thumbnail = raw
break break
break break
if book.lpath in playlist_map: book.device_collections = playlist_map.get(book.lpath, [])
tags = playlist_map[book.lpath]
book.device_collections = tags
debug_print('Finished updating JSON cache:', bl_index) debug_print('Finished updating JSON cache:', bl_index)
# }}} # }}}
# Update XML from JSON {{{ # Update XML from JSON {{{
def update(self, booklists, collections_attributes): def update(self, booklists, collections_attributes):
debug_print('Starting update XML from JSON') debug_print('Starting update', collections_attributes)
playlist_map = self.get_playlist_map()
for i, booklist in booklists.items(): for i, booklist in booklists.items():
if DEBUG: playlist_map = self.build_id_playlist_map(i)
debug_print('Updating XML Cache:', i) debug_print('Updating XML Cache:', i)
root = self.record_roots[i] root = self.record_roots[i]
lpath_map = self.build_lpath_map(root) lpath_map = self.build_lpath_map(root)
gtz_count = ltz_count = 0
for book in booklist: for book in booklist:
path = os.path.join(self.prefixes[i], *(book.lpath.split('/'))) path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
# record = self.book_by_lpath(book.lpath, root)
record = lpath_map.get(book.lpath, None) record = lpath_map.get(book.lpath, None)
if record is None: if record is None:
record = self.create_text_record(root, i, book.lpath) record = self.create_text_record(root, i, book.lpath)
self.update_text_record(record, book, path, i) (gtz_count, ltz_count) = self.update_text_record(record, book,
path, i, gtz_count, ltz_count)
bl_pmap = playlist_map[i] # Ensure the collections in the XML database are recorded for
self.update_playlists(i, root, booklist, bl_pmap, # this book
collections_attributes) if book.device_collections is None:
book.device_collections = []
book.device_collections = playlist_map.get(book.lpath, [])
debug_print('Timezone votes: %d GMT, %d LTZ'%(gtz_count, ltz_count))
self.update_playlists(i, root, booklist, collections_attributes)
# Update the device collections because update playlist could have added
# some new ones.
debug_print('In update/ Starting refresh of device_collections')
for i, booklist in booklists.items():
playlist_map = self.build_id_playlist_map(i)
for book in booklist:
book.device_collections = playlist_map.get(book.lpath, [])
self.fix_ids()
debug_print('Finished update')
def rebuild_collections(self, booklist, bl_index):
if bl_index not in self.record_roots:
return
root = self.record_roots[bl_index]
self.update_playlists(bl_index, root, booklist, [])
self.fix_ids() self.fix_ids()
# This is needed to update device_collections def update_playlists(self, bl_index, root, booklist, collections_attributes):
for i, booklist in booklists.items(): debug_print('Starting update_playlists', collections_attributes, bl_index)
self.update_booklist(booklist, i) self.reset_existing_playlists_map()
debug_print('Finished update XML from JSON')
def update_playlists(self, bl_index, root, booklist, playlist_map,
collections_attributes):
debug_print('Starting update_playlists')
collections = booklist.get_collections(collections_attributes) collections = booklist.get_collections(collections_attributes)
lpath_map = self.build_lpath_map(root) lpath_map = self.build_lpath_map(root)
debug_print('update_playlists: finished building maps')
for category, books in collections.items(): for category, books in collections.items():
records = [lpath_map.get(b.lpath, None) for b in books] records = [lpath_map.get(b.lpath, None) for b in books]
# Remove any books that were not found, although this # Remove any books that were not found, although this
@ -367,25 +391,34 @@ class XMLCache(object):
debug_print('WARNING: Some elements in the JSON cache were not' debug_print('WARNING: Some elements in the JSON cache were not'
' found in the XML cache') ' found in the XML cache')
records = [x for x in records if x is not None] records = [x for x in records if x is not None]
# Ensure each book has an ID.
for rec in records: for rec in records:
if rec.get('id', None) is None: if rec.get('id', None) is None:
rec.set('id', str(self.max_id(root)+1)) rec.set('id', str(self.max_id(root)+1))
ids = [x.get('id', None) for x in records] ids = [x.get('id', None) for x in records]
# Given that we set the ids, there shouldn't be any None's. But
# better to be safe...
if None in ids: if None in ids:
if DEBUG: debug_print('WARNING: Some <text> elements do not have ids')
debug_print('WARNING: Some <text> elements do not have ids') ids = [x for x in ids if x is not None]
ids = [x for x in ids if x is not None]
playlist = self.get_or_create_playlist(bl_index, category) playlist = self.get_or_create_playlist(bl_index, category)
# Get the books currently in the playlist. We will need them to be
# sure to put back any books that were manually added.
playlist_ids = [] playlist_ids = []
for item in playlist: for item in playlist:
id_ = item.get('id', None) id_ = item.get('id', None)
if id_ is not None: if id_ is not None:
playlist_ids.append(id_) playlist_ids.append(id_)
# Empty the playlist. We do this so that the playlist will have the
# order specified by get_collections
for item in list(playlist): for item in list(playlist):
playlist.remove(item) playlist.remove(item)
# Get a list of ids not known by get_collections
extra_ids = [x for x in playlist_ids if x not in ids] extra_ids = [x for x in playlist_ids if x not in ids]
# Rebuild the collection in the order specified by get_collections. Then
# add the ids that get_collections didn't know about.
for id_ in ids + extra_ids: for id_ in ids + extra_ids:
item = playlist.makeelement( item = playlist.makeelement(
'{%s}item'%self.namespaces[bl_index], '{%s}item'%self.namespaces[bl_index],
@ -423,11 +456,38 @@ class XMLCache(object):
root.append(ans) root.append(ans)
return ans return ans
def update_text_record(self, record, book, path, bl_index): def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count):
'''
Update the Sony database from the book. This is done if the timestamp in
the db differs from the timestamp on the file.
'''
# It seems that a Sony device can sometimes know what timezone it is in,
# and apparently converts the dates to GMT when it writes them to the
# db. Unfortunately, we can't tell when it does this, so we use a
# horrible heuristic. First, set dates only for new books, trying to
# avoid upsetting the sony. Use the timezone determined through the
# voting described next. Second, voting: if a book is not new, compare
# its Sony DB date against localtime and gmtime. Count the matches. When
# we must set a date, use the one with the most matches. Use localtime
# if the case of a tie, and hope it is right.
timestamp = os.path.getmtime(path) timestamp = os.path.getmtime(path)
date = strftime(timestamp) rec_date = record.get('date', None)
if date != record.get('date', None): if not getattr(book, '_new_book', False): # book is not new
if strftime(timestamp, zone=time.gmtime) == rec_date:
gtz_count += 1
elif strftime(timestamp, zone=time.localtime) == rec_date:
ltz_count += 1
else: # book is new. Set the time using the current votes
if ltz_count >= gtz_count:
tz = time.localtime
debug_print("Using localtime TZ for new book", book.lpath)
else:
tz = time.gmtime
debug_print("Using GMT TZ for new book", book.lpath)
date = strftime(timestamp, zone=tz)
record.set('date', date) record.set('date', date)
record.set('size', str(os.stat(path).st_size)) record.set('size', str(os.stat(path).st_size))
title = book.title if book.title else _('Unknown') title = book.title if book.title else _('Unknown')
record.set('title', title) record.set('title', title)
@ -452,6 +512,7 @@ class XMLCache(object):
if 'id' not in record.attrib: if 'id' not in record.attrib:
num = self.max_id(record.getroottree().getroot()) num = self.max_id(record.getroottree().getroot())
record.set('id', str(num+1)) record.set('id', str(num+1))
return (gtz_count, ltz_count)
# }}} # }}}
# Writing the XML files {{{ # Writing the XML files {{{
@ -544,10 +605,5 @@ class XMLCache(object):
break break
self.namespaces[i] = ns self.namespaces[i] = ns
# if DEBUG:
# debug_print('Found nsmaps:')
# pprint(self.nsmaps)
# debug_print('Found namespaces:')
# pprint(self.namespaces)
# }}} # }}}

View File

@ -11,10 +11,11 @@ from calibre.devices.mime import mime_type_ext
from calibre.devices.interface import BookList as _BookList from calibre.devices.interface import BookList as _BookList
from calibre.constants import filesystem_encoding, preferred_encoding from calibre.constants import filesystem_encoding, preferred_encoding
from calibre import isbytestring from calibre import isbytestring
from calibre.utils.config import prefs
class Book(MetaInformation): class Book(MetaInformation):
BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections'] BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
JSON_ATTRS = [ JSON_ATTRS = [
'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort', 'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
@ -29,6 +30,7 @@ class Book(MetaInformation):
MetaInformation.__init__(self, '') MetaInformation.__init__(self, '')
self._new_book = False
self.device_collections = [] self.device_collections = []
self.path = os.path.join(prefix, lpath) self.path = os.path.join(prefix, lpath)
if os.sep == '\\': if os.sep == '\\':
@ -76,7 +78,7 @@ class Book(MetaInformation):
in C{other} takes precedence, unless the information in C{other} is NULL. in C{other} takes precedence, unless the information in C{other} is NULL.
''' '''
MetaInformation.smart_update(self, other) MetaInformation.smart_update(self, other, replace_tags=True)
for attr in self.BOOK_ATTRS: for attr in self.BOOK_ATTRS:
if hasattr(other, attr): if hasattr(other, attr):
@ -132,10 +134,28 @@ class CollectionsBookList(BookList):
def get_collections(self, collection_attributes): def get_collections(self, collection_attributes):
collections = {} collections = {}
series_categories = set([]) series_categories = set([])
collection_attributes = list(collection_attributes)+['device_collections'] # This map of sets is used to avoid linear searches when testing for
for attr in collection_attributes: # book equality
attr = attr.strip() collections_lpaths = {}
for book in self: for book in self:
# Make sure we can identify this book via the lpath
lpath = getattr(book, 'lpath', None)
if lpath is None:
continue
# Decide how we will build the collections. The default: leave the
# book in all existing collections. Do not add any new ones.
attrs = ['device_collections']
if getattr(book, '_new_book', False):
if prefs['preserve_user_collections']:
# Ensure that the book is in all the book's existing
# collections plus all metadata collections
attrs += collection_attributes
else:
# The book's existing collections are ignored. Put the book
# in collections defined by its metadata.
attrs = collection_attributes
for attr in attrs:
attr = attr.strip()
val = getattr(book, attr, None) val = getattr(book, attr, None)
if not val: continue if not val: continue
if isbytestring(val): if isbytestring(val):
@ -150,11 +170,12 @@ class CollectionsBookList(BookList):
continue continue
if category not in collections: if category not in collections:
collections[category] = [] collections[category] = []
if book not in collections[category]: collections_lpaths[category] = set()
if lpath not in collections_lpaths[category]:
collections_lpaths[category].add(lpath)
collections[category].append(book) collections[category].append(book)
if attr == 'series': if attr == 'series':
series_categories.add(category) series_categories.add(category)
# Sort collections # Sort collections
for category, books in collections.items(): for category, books in collections.items():
def tgetter(x): def tgetter(x):
@ -167,3 +188,15 @@ class CollectionsBookList(BookList):
books.sort(cmp=lambda x,y:cmp(getter(x), getter(y))) books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
return collections return collections
def rebuild_collections(self, booklist, oncard):
'''
For each book in the booklist for the card oncard, remove it from all
its current collections, then add it to the collections specified in
device_collections.
oncard is None for the main memory, carda for card A, cardb for card B,
etc.
booklist is the object created by the :method:`books` call above.
'''
pass

View File

@ -78,9 +78,6 @@ class Device(DeviceConfig, DevicePlugin):
STORAGE_CARD_VOLUME_LABEL = '' STORAGE_CARD_VOLUME_LABEL = ''
STORAGE_CARD2_VOLUME_LABEL = None STORAGE_CARD2_VOLUME_LABEL = None
SUPPORTS_SUB_DIRS = False
MUST_READ_METADATA = False
SUPPORTS_USE_AUTHOR_SORT = False
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD_A = '' EBOOK_DIR_CARD_A = ''

View File

@ -13,6 +13,10 @@ class DeviceConfig(object):
EXTRA_CUSTOMIZATION_MESSAGE = None EXTRA_CUSTOMIZATION_MESSAGE = None
EXTRA_CUSTOMIZATION_DEFAULT = None EXTRA_CUSTOMIZATION_DEFAULT = None
SUPPORTS_SUB_DIRS = False
MUST_READ_METADATA = False
SUPPORTS_USE_AUTHOR_SORT = False
#: If None the default is used #: If None the default is used
SAVE_TEMPLATE = None SAVE_TEMPLATE = None
@ -23,9 +27,14 @@ class DeviceConfig(object):
config().parse().send_template config().parse().send_template
@classmethod @classmethod
def _config(cls): def _config_base_name(cls):
klass = cls if isinstance(cls, type) else cls.__class__ klass = cls if isinstance(cls, type) else cls.__class__
c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers')) return klass.__name__
@classmethod
def _config(cls):
name = cls._config_base_name()
c = Config('device_drivers_%s' % name, _('settings for device drivers'))
c.add_opt('format_map', default=cls.FORMATS, c.add_opt('format_map', default=cls.FORMATS,
help=_('Ordered list of formats the device will accept')) help=_('Ordered list of formats the device will accept'))
c.add_opt('use_subdirs', default=True, c.add_opt('use_subdirs', default=True,

View File

@ -233,6 +233,7 @@ class USBMS(CLI, Device):
book = self.book_class(prefix, lpath, other=info) book = self.book_class(prefix, lpath, other=info)
if book.size is None: if book.size is None:
book.size = os.stat(self.normalize_path(path)).st_size book.size = os.stat(self.normalize_path(path)).st_size
book._new_book = True # Must be before add_book
booklists[blist].add_book(book, replace_metadata=True) booklists[blist].add_book(book, replace_metadata=True)
self.report_progress(1.0, _('Adding books to device metadata listing...')) self.report_progress(1.0, _('Adding books to device metadata listing...'))
debug_print('USBMS: finished adding metadata') debug_print('USBMS: finished adding metadata')
@ -273,6 +274,9 @@ class USBMS(CLI, Device):
self.report_progress(1.0, _('Removing books from device metadata listing...')) self.report_progress(1.0, _('Removing books from device metadata listing...'))
debug_print('USBMS: finished removing metadata for %d books'%(len(paths))) debug_print('USBMS: finished removing metadata for %d books'%(len(paths)))
# If you override this method and you use book._new_book, then you must
# complete the processing before you call this method. The flag is cleared
# at the end just before the return
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
debug_print('USBMS: starting sync_booklists') debug_print('USBMS: starting sync_booklists')
@ -286,11 +290,18 @@ class USBMS(CLI, Device):
js = [item.to_json() for item in booklists[listid] if js = [item.to_json() for item in booklists[listid] if
hasattr(item, 'to_json')] hasattr(item, 'to_json')]
with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f: with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
json.dump(js, f, indent=2, encoding='utf-8') f.write(json.dumps(js, indent=2, encoding='utf-8'))
write_prefix(self._main_prefix, 0) write_prefix(self._main_prefix, 0)
write_prefix(self._card_a_prefix, 1) write_prefix(self._card_a_prefix, 1)
write_prefix(self._card_b_prefix, 2) write_prefix(self._card_b_prefix, 2)
# Clear the _new_book indication, as we are supposed to be done with
# adding books at this point
for blist in booklists:
if blist is not None:
for book in blist:
book._new_book = False
self.report_progress(1.0, _('Sending metadata to device...')) self.report_progress(1.0, _('Sending metadata to device...'))
debug_print('USBMS: finished sync_booklists') debug_print('USBMS: finished sync_booklists')

View File

@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin):
log.debug('stream.name=%s' % stream.name) log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log) mainname = self._chmtohtml(tdir, chm_name, no_images, log)
mainpath = os.path.join(tdir, mainname) mainpath = os.path.join(tdir, mainname)
#raw_input()
metadata = get_metadata_from_reader(self._chm_reader) metadata = get_metadata_from_reader(self._chm_reader)
@ -92,7 +91,7 @@ class CHMInput(InputFormatPlugin):
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
if not metadata.language: if not metadata.language:
oeb.logger.warn(u'Language not specified') oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang()) metadata.add('language', get_lang().replace('_', '-'))
if not metadata.creator: if not metadata.creator:
oeb.logger.warn('Creator not specified') oeb.logger.warn('Creator not specified')
metadata.add('creator', _('Unknown')) metadata.add('creator', _('Unknown'))
@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin):
log.debug('Found %d section nodes' % len(chapters)) log.debug('Found %d section nodes' % len(chapters))
htmlpath = os.path.splitext(hhcpath)[0] + ".html" htmlpath = os.path.splitext(hhcpath)[0] + ".html"
f = open(htmlpath, 'wb') f = open(htmlpath, 'wb')
f.write('<html><head><meta http-equiv="Content-type"'
' content="text/html;charset=UTF-8" /></head><body>\n')
if chapters: if chapters:
f.write('<html><head><meta http-equiv="Content-type"'
' content="text/html;charset=UTF-8" /></head><body>\n')
path0 = chapters[0][1] path0 = chapters[0][1]
subpath = os.path.dirname(path0) subpath = os.path.dirname(path0)
@ -158,7 +156,9 @@ class CHMInput(InputFormatPlugin):
url = url.encode('utf-8') url = url.encode('utf-8')
f.write(url) f.write(url)
f.write("</body></html>") f.write("</body></html>")
else:
f.write(hhcdata)
f.close() f.close()
return htmlpath return htmlpath

View File

@ -8,7 +8,7 @@ import os, re
from mimetypes import guess_type as guess_mimetype from mimetypes import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
from calibre.constants import iswindows from calibre.constants import iswindows, filesystem_encoding
from calibre.utils.chm.chm import CHMFile from calibre.utils.chm.chm import CHMFile
from calibre.utils.chm.chmlib import ( from calibre.utils.chm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -78,6 +78,8 @@ class CHMError(Exception):
class CHMReader(CHMFile): class CHMReader(CHMFile):
def __init__(self, input, log): def __init__(self, input, log):
CHMFile.__init__(self) CHMFile.__init__(self)
if isinstance(input, unicode):
input = input.encode(filesystem_encoding)
if not self.LoadCHM(input): if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,)) raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log self.log = log
@ -91,7 +93,6 @@ class CHMReader(CHMFile):
self.root, ext = os.path.splitext(self.topics.lstrip('/')) self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc" self.hhc_path = self.root + ".hhc"
def _parse_toc(self, ul, basedir=os.getcwdu()): def _parse_toc(self, ul, basedir=os.getcwdu()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='') toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1 self._playorder += 1
@ -152,6 +153,8 @@ class CHMReader(CHMFile):
if f.lower() == self.hhc_path.lower(): if f.lower() == self.hhc_path.lower():
self.hhc_path = f self.hhc_path = f
break break
if self.hhc_path not in files and files:
self.hhc_path = files[0]
def _reformat(self, data): def _reformat(self, data):
try: try:
@ -159,7 +162,7 @@ class CHMReader(CHMFile):
soup = BeautifulSoup(data) soup = BeautifulSoup(data)
except ValueError: except ValueError:
# hit some strange encoding problems... # hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :(" self.log.exception("Unable to parse html for cleaning, leaving it")
return data return data
# nuke javascript... # nuke javascript...
[s.extract() for s in soup('script')] [s.extract() for s in soup('script')]

View File

@ -151,6 +151,7 @@ cpalmdoc_do_compress(buffer *b, char *output) {
for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j]; for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
} }
} }
PyMem_Free(temp.data);
return output - head; return output - head;
} }
@ -168,7 +169,9 @@ cpalmdoc_compress(PyObject *self, PyObject *args) {
for (j = 0; j < input_len; j++) for (j = 0; j < input_len; j++)
b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
b.len = input_len; b.len = input_len;
output = (char *)PyMem_Malloc(sizeof(char) * b.len); // Make the output buffer larger than the input as sometimes
// compression results in a larger block
output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
if (output == NULL) return PyErr_NoMemory(); if (output == NULL) return PyErr_NoMemory();
j = cpalmdoc_do_compress(&b, output); j = cpalmdoc_do_compress(&b, output);
if ( j == 0) return PyErr_NoMemory(); if ( j == 0) return PyErr_NoMemory();

View File

@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE) _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
LIGATURES = { LIGATURES = {
u'\u00c6': u'AE', # u'\u00c6': u'AE',
u'\u00e6': u'ae', # u'\u00e6': u'ae',
u'\u0152': u'OE', # u'\u0152': u'OE',
u'\u0153': u'oe', # u'\u0153': u'oe',
u'\u0132': u'IJ', # u'\u0132': u'IJ',
u'\u0133': u'ij', # u'\u0133': u'ij',
u'\u1D6B': u'ue', # u'\u1D6B': u'ue',
u'\uFB00': u'ff', u'\uFB00': u'ff',
u'\uFB01': u'fi', u'\uFB01': u'fi',
u'\uFB02': u'fl', u'\uFB02': u'fl',
@ -107,9 +107,21 @@ class CSSPreProcessor(object):
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}') PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
def __call__(self, data): def __call__(self, data, add_namespace=False):
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
data = self.PAGE_PAT.sub('', data) data = self.PAGE_PAT.sub('', data)
return data if not add_namespace:
return data
ans, namespaced = [], False
for line in data.splitlines():
ll = line.lstrip()
if not (namespaced or ll.startswith('@import') or
ll.startswith('@charset')):
ans.append(XHTML_CSS_NAMESPACE.strip())
namespaced = True
ans.append(line)
return u'\n'.join(ans)
class HTMLPreProcessor(object): class HTMLPreProcessor(object):
@ -268,7 +280,7 @@ class HTMLPreProcessor(object):
if getattr(self.extra_opts, 'remove_footer', None): if getattr(self.extra_opts, 'remove_footer', None):
try: try:
rules.insert(0 rules.insert(0,
(re.compile(self.extra_opts.footer_regex), lambda match : '') (re.compile(self.extra_opts.footer_regex), lambda match : '')
) )
except: except:

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize import Plugin
class InvalidEpub(ValueError):
pass
class ePubFixer(Plugin):
supported_platforms = ['windows', 'osx', 'linux']
author = 'Kovid Goyal'
type = _('ePub Fixer')
can_be_disabled = True
# API that subclasses must implement {{{
@property
def short_description(self):
raise NotImplementedError
@property
def long_description(self):
raise NotImplementedError
@property
def fix_name(self):
raise NotImplementedError
@property
def options(self):
'''
Return a list of 4-tuples
(option_name, type, default, help_text)
type is one of 'bool', 'int', 'string'
'''
return []
def run(self, container, opts, log, fix=False):
raise NotImplementedError
# }}}
def add_options_to_parser(self, parser):
parser.add_option('--' + self.fix_name.replace('_', '-'),
help=self.long_description, action='store_true', default=False)
for option in self.options:
action = 'store'
if option[1] == 'bool':
action = 'store_true'
kwargs = {'action': action, 'default':option[2], 'help':option[3]}
if option[1] != 'bool':
kwargs['type'] = option[1]
parser.add_option('--'+option[0].replace('_', '-'), **kwargs)

View File

@ -0,0 +1,182 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, posixpath, urllib, sys
from lxml import etree
from calibre.ebooks.epub.fix import InvalidEpub
from calibre import guess_type, prepare_string_for_xml
from calibre.ebooks.chardet import xml_to_unicode
from calibre.constants import iswindows
from calibre.utils.zipfile import ZipFile, ZIP_STORED
exists, join = os.path.exists, os.path.join
OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
OPF_NS = 'http://www.idpf.org/2007/opf'
class Container(object):
META_INF = {
'container.xml' : True,
'manifest.xml' : False,
'encryption.xml' : False,
'metadata.xml' : False,
'signatures.xml' : False,
'rights.xml' : False,
}
def __init__(self, path, log):
self.root = os.path.abspath(path)
self.log = log
self.dirtied = set([])
self.cache = {}
self.mime_map = {}
if exists(join(self.root, 'mimetype')):
os.remove(join(self.root, 'mimetype'))
container_path = join(self.root, 'META-INF', 'container.xml')
if not exists(container_path):
raise InvalidEpub('No META-INF/container.xml in epub')
self.container = etree.fromstring(open(container_path, 'rb').read())
opf_files = self.container.xpath((
r'child::ocf:rootfiles/ocf:rootfile'
'[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
), namespaces={'ocf':OCF_NS}
)
if not opf_files:
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
opf_path = os.path.join(self.root,
*opf_files[0].get('full-path').split('/'))
if not exists(opf_path):
raise InvalidEpub('OPF file does not exist at location pointed to'
' by META-INF/container.xml')
# Map of relative paths with / separators to absolute
# paths on filesystem with os separators
self.name_map = {}
for dirpath, dirnames, filenames in os.walk(self.root):
for f in filenames:
path = join(dirpath, f)
name = os.path.relpath(path, self.root).replace(os.sep, '/')
self.name_map[name] = path
if path == opf_path:
self.opf_name = name
self.mime_map[name] = guess_type('a.opf')[0]
for item in self.opf.xpath(
'//opf:manifest/opf:item[@href and @media-type]',
namespaces={'opf':OPF_NS}):
href = item.get('href')
self.mime_map[self.href_to_name(href,
posixpath.dirname(self.opf_name))] = item.get('media-type')
def manifest_worthy_names(self):
for name in self.name_map:
if name.endswith('.opf'): continue
if name.startswith('META-INF') and \
posixpath.basename(name) in self.META_INF: continue
yield name
def delete_name(self, name):
self.mime_map.pop(name, None)
path = self.name_map[name]
os.remove(path)
self.name_map.pop(name)
def manifest_item_for_name(self, name):
href = self.name_to_href(name,
posixpath.dirname(self.opf_name))
q = prepare_string_for_xml(href, attribute=True)
existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
namespaces={'opf':OPF_NS})
if not existing:
return None
return existing[0]
def add_name_to_manifest(self, name):
item = self.manifest_item_for_name(name)
if item is not None:
return
manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
id=self.generate_manifest_id())
mt = guess_type(posixpath.basename(name))[0]
if not mt:
mt = 'application/octest-stream'
item.set('media-type', mt)
manifest.append(item)
def generate_manifest_id(self):
items = self.opf.xpath('//opf:manifest/opf:item[@id]',
namespaces={'opf':OPF_NS})
ids = set([x.get('id') for x in items])
for x in xrange(sys.maxint):
c = 'id%d'%x
if c not in ids:
return c
@property
def opf(self):
return self.get(self.opf_name)
def href_to_name(self, href, base=''):
href = urllib.unquote(href.partition('#')[0])
name = href
if base:
name = posixpath.join(base, href)
return name
def name_to_href(self, name, base):
if not base:
return name
return posixpath.relpath(name, base)
def get_raw(self, name):
path = self.name_map[name]
return open(path, 'rb').read()
def get(self, name):
if name in self.cache:
return self.cache[name]
raw = self.get_raw(name)
if name in self.mime_map:
raw = self._parse(raw, self.mime_map[name])
self.cache[name] = raw
return raw
def set(self, name, val):
self.cache[name] = val
self.dirtied.add(name)
def _parse(self, raw, mimetype):
mt = mimetype.lower()
if mt.endswith('+xml'):
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
return etree.fromstring(xml_to_unicode(raw,
strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
return raw
def write(self, path):
for name in self.dirtied:
data = self.cache[name]
raw = data
if hasattr(data, 'xpath'):
raw = etree.tostring(data, encoding='utf-8',
xml_declaration=True)
with open(self.name_map[name], 'wb') as f:
f.write(raw)
self.dirtied.clear()
zf = ZipFile(path, 'w')
zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
compression=ZIP_STORED)
zf.add_dir(self.root)
zf.close()

View File

@ -0,0 +1,82 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
from calibre.utils.date import parse_date, strptime
class Epubcheck(ePubFixer):
name = 'Workaround epubcheck bugs'
@property
def short_description(self):
return _('Workaround epubcheck bugs')
@property
def long_description(self):
return _('Workarounds for bugs in the latest release of epubcheck. '
'epubcheck reports many things as errors that are not '
'actually errors. %prog will try to detect these and replace '
'them with constructs that epubcheck likes. This may cause '
'significant changes to your epub, complain to the epubcheck '
'project.')
@property
def fix_name(self):
return 'epubcheck'
def fix_pubdates(self):
dirtied = False
opf = self.container.opf
for dcdate in opf.xpath('//dc:date',
namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
raw = dcdate.text
if not raw: raw = ''
default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
try:
ts = parse_date(raw, assume_utc=False, as_utc=True,
default=default)
except:
raise InvalidEpub('Invalid date set in OPF', raw)
sval = ts.strftime('%Y-%m-%d')
if sval != raw:
self.log.error(
'OPF contains date', raw, 'that epubcheck does not like')
if self.fix:
dcdate.text = sval
self.log('\tReplaced', raw, 'with', sval)
dirtied = True
if dirtied:
self.container.set(self.container.opf_name, opf)
def fix_preserve_aspect_ratio(self):
for name in self.container.name_map:
mt = self.container.mime_map.get(name, '')
if mt.lower() == 'application/xhtml+xml':
root = self.container.get(name)
dirtied = False
for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
namespaces={'svg':'http://www.w3.org/2000/svg'}):
self.log.error('Found <svg> element with'
' preserveAspectRatio="none" which epubcheck '
'cannot handle')
if self.fix:
svg.set('preserveAspectRatio', 'xMidYMid meet')
dirtied = True
self.log('\tReplaced none with xMidYMid meet')
if dirtied:
self.container.set(name, root)
def run(self, container, opts, log, fix=False):
self.container = container
self.opts = opts
self.log = log
self.fix = fix
self.fix_pubdates()
self.fix_preserve_aspect_ratio()

View File

@ -0,0 +1,56 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os
from calibre.utils.config import OptionParser
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from calibre.utils.zipfile import ZipFile
from calibre.utils.logging import default_log
from calibre.customize.ui import epub_fixers
from calibre.ebooks.epub.fix.container import Container
def option_parser():
parser = OptionParser(usage=_(
'%prog [options] file.epub\n\n'
'Fix common problems in EPUB files that can cause them '
'to be rejected by poorly designed publishing services.\n\n'
'By default, no fixing is done and messages are printed out '
'for each error detected. Use the options to control which errors '
'are automatically fixed.'))
for fixer in epub_fixers():
fixer.add_options_to_parser(parser)
return parser
def run(epub, opts, log):
with TemporaryDirectory('_epub-fix') as tdir:
with CurrentDir(tdir):
zf = ZipFile(epub)
zf.extractall()
zf.close()
container = Container(tdir, log)
for fixer in epub_fixers():
fix = getattr(opts, fixer.fix_name, False)
fixer.run(container, opts, log, fix=fix)
container.write(epub)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
default_log.error(_('You must specify an epub file'))
return
epub = os.path.abspath(args[1])
run(epub, opts, default_log)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.epub.fix import ePubFixer
class Unmanifested(ePubFixer):
name = 'Fix unmanifested files'
@property
def short_description(self):
return _('Fix unmanifested files')
@property
def long_description(self):
return _('Fix unmanifested files. %prog can either add them to '
'the manifest or delete them as specified by the '
'delete unmanifested option.')
@property
def fix_name(self):
return 'unmanifested'
@property
def options(self):
return [('delete_unmanifested', 'bool', False,
_('Delete unmanifested files instead of adding them to the manifest'))]
def run(self, container, opts, log, fix=False):
dirtied = False
for name in list(container.manifest_worthy_names()):
item = container.manifest_item_for_name(name)
if item is None:
log.error(name, 'not in manifest')
if fix:
if opts.delete_unmanifested:
container.delete_name(name)
log('\tDeleted')
else:
container.add_name_to_manifest(name)
log('\tAdded to manifest')
dirtied = True
if dirtied:
container.set(container.opf_name, container.opf)

View File

@ -84,7 +84,7 @@ class EPUBOutput(OutputFormatPlugin):
OptionRecommendation(name='no_svg_cover', recommended_value=False, OptionRecommendation(name='no_svg_cover', recommended_value=False,
help=_('Do not use SVG for the book cover. Use this option if ' help=_('Do not use SVG for the book cover. Use this option if '
'your EPUB is going to be used ona device that does not ' 'your EPUB is going to be used on a device that does not '
'support SVG, like the iPhone or the JetBook Lite. ' 'support SVG, like the iPhone or the JetBook Lite. '
'Without this option, such devices will display the cover ' 'Without this option, such devices will display the cover '
'as a blank page.') 'as a blank page.')
@ -380,10 +380,9 @@ class EPUBOutput(OutputFormatPlugin):
sel = '.'+lb.get('class') sel = '.'+lb.get('class')
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
if sel == rule.selectorList.selectorText: if sel == rule.selectorList.selectorText:
val = rule.style.removeProperty('margin-left') rule.style.removeProperty('margin-left')
pval = rule.style.getProperty('padding-left') # padding-left breaks rendering in webkit and gecko
if val and not pval: rule.style.removeProperty('padding-left')
rule.style.setProperty('padding-left', val)
# }}} # }}}

View File

@ -20,7 +20,7 @@ from itertools import izip
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isfreebsd from calibre.constants import islinux, isfreebsd, iswindows
from calibre import unicode_path from calibre import unicode_path
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
@ -32,9 +32,14 @@ class Link(object):
@classmethod @classmethod
def url_to_local_path(cls, url, base): def url_to_local_path(cls, url, base):
path = urlunparse(('', '', url.path, url.params, url.query, '')) path = url.path
isabs = False
if iswindows and path.startswith('/'):
path = path[1:]
isabs = True
path = urlunparse(('', '', path, url.params, url.query, ''))
path = unquote(path) path = unquote(path)
if os.path.isabs(path): if isabs or os.path.isabs(path):
return path return path
return os.path.abspath(os.path.join(base, path)) return os.path.abspath(os.path.join(base, path))
@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
xpath xpath
from calibre import guess_type from calibre import guess_type
import cssutils import cssutils
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self, oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False) encoding=opts.input_encoding, populate=False)
self.oeb = oeb self.oeb = oeb
@ -323,7 +329,7 @@ class HTMLInput(InputFormatPlugin):
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
if not metadata.language: if not metadata.language:
oeb.logger.warn(u'Language not specified') oeb.logger.warn(u'Language not specified')
metadata.add('language', get_lang()) metadata.add('language', get_lang().replace('_', '-'))
if not metadata.creator: if not metadata.creator:
oeb.logger.warn('Creator not specified') oeb.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown'))) metadata.add('creator', self.oeb.translate(__('Unknown')))
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
rewrite_links(item.data, partial(self.resource_adder, base=dpath)) rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item.media_type in OEB_STYLES: if item.media_type in self.OEB_STYLES:
dpath = None dpath = None
for path, href in self.added_resources.items(): for path, href in self.added_resources.items():
if href == item.href: if href == item.href:
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
oeb.container = DirContainer(os.getcwdu(), oeb.log) oeb.container = DirContainer(os.getcwdu(), oeb.log)
return oeb return oeb
def link_to_local_path(self, link_, base=None):
if not isinstance(link_, unicode):
try:
link_ = link_.decode('utf-8', 'error')
except:
self.log.warn('Failed to decode link %r. Ignoring'%link_)
return None, None
try:
l = Link(link_, base if base else os.getcwdu())
except:
self.log.exception('Failed to process link: %r'%link_)
return None, None
if l.path is None:
# Not a local resource
return None, None
link = l.path.replace('/', os.sep).strip()
frag = l.fragment
if not link:
return None, None
return link, frag
def resource_adder(self, link_, base=None): def resource_adder(self, link_, base=None):
link = self.urlnormalize(link_) link, frag = self.link_to_local_path(link_, base=base)
link, frag = self.urldefrag(link) if link is None:
link = unquote(link).replace('/', os.sep)
if not link.strip():
return link_ return link_
try: try:
if base and not os.path.isabs(link): if base and not os.path.isabs(link):
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
item = self.oeb.manifest.add(id, href, media_type) item = self.oeb.manifest.add(id, href, media_type)
item.html_input_href = bhref item.html_input_href = bhref
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
item.data item.data
self.added_resources[link] = href self.added_resources[link] = href
@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
nlink = '#'.join((nlink, frag)) nlink = '#'.join((nlink, frag))
return nlink return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return (None, None)
try:
raw = open(link, 'rb').read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return (None, None)
return (None, raw)

View File

@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
xml = d.to_xml(write_files=True) xml = d.to_xml(write_files=True)
if options.verbose > 2: if options.verbose > 2:
open('lrs.xml', 'wb').write(xml.encode('utf-8')) open('lrs.xml', 'wb').write(xml.encode('utf-8'))
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(no_network=True, huge_tree=True)
doc = etree.fromstring(xml, parser=parser) doc = etree.fromstring(xml, parser=parser)
char_button_map = {} char_button_map = {}
for x in doc.xpath('//CharButton[@refobj]'): for x in doc.xpath('//CharButton[@refobj]'):

View File

@ -870,7 +870,7 @@ class Text(LRFStream):
open_containers = collections.deque() open_containers = collections.deque()
for c in self.content: for c in self.content:
if isinstance(c, basestring): if isinstance(c, basestring):
s += prepare_string_for_xml(c) s += prepare_string_for_xml(c).replace('\0', '')
elif c is None: elif c is None:
if open_containers: if open_containers:
p = open_containers.pop() p = open_containers.pop()

Some files were not shown because too many files have changed in this diff Show More